summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms')
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp31
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp138
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp54
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp437
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp514
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Inliner.cpp27
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Internalize.cpp136
-rw-r--r--contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp77
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp62
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PruneEH.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp231
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombine.h16
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp49
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp263
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp46
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp168
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp392
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp30
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp385
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp15
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp321
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h9
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp114
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp366
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp126
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp1397
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/DebugIR.cpp618
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/DebugIR.h99
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp117
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp93
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp537
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp225
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp1424
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp169
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h36
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp18
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h186
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h2
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h6
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h2
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp110
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp760
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCUtil.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ADCE.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp152
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp105
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp79
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVN.cpp252
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp18
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp91
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp89
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp74
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp1184
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp136
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp42
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp229
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp23
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp156
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp41
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SCCP.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SROA.cpp2802
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp479
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalar.cpp16
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp51
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp194
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp247
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp906
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp170
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp175
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp43
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp5
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp486
-rw-r--r--contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp183
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LCSSA.cpp15
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp473
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp23
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp21
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp62
-rw-r--r--contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp18
-rw-r--r--contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp1082
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp50
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp311
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp323
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SpecialCaseList.cpp222
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp29
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp90
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp2795
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp2529
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp730
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/VecUtils.h164
109 files changed, 17921 insertions, 9380 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index e6fa4ed..df08091 100644
--- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -88,7 +88,7 @@ char ArgPromotion::ID = 0;
INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
"Promote 'by reference' arguments to scalars", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
"Promote 'by reference' arguments to scalars", false, false)
@@ -126,12 +126,10 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
if (!F || !F->hasLocalLinkage()) return 0;
// First check: see if there are any pointer arguments! If not, quick exit.
- SmallVector<std::pair<Argument*, unsigned>, 16> PointerArgs;
- unsigned ArgNo = 0;
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I, ++ArgNo)
+ SmallVector<Argument*, 16> PointerArgs;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
if (I->getType()->isPointerTy())
- PointerArgs.push_back(std::pair<Argument*, unsigned>(I, ArgNo));
+ PointerArgs.push_back(I);
if (PointerArgs.empty()) return 0;
// Second check: make sure that all callers are direct callers. We can't
@@ -152,15 +150,13 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
// add it to ArgsToPromote.
SmallPtrSet<Argument*, 8> ArgsToPromote;
SmallPtrSet<Argument*, 8> ByValArgsToTransform;
- for (unsigned i = 0; i != PointerArgs.size(); ++i) {
- bool isByVal=F->getAttributes().
- hasAttribute(PointerArgs[i].second+1, Attribute::ByVal);
- Argument *PtrArg = PointerArgs[i].first;
+ for (unsigned i = 0, e = PointerArgs.size(); i != e; ++i) {
+ Argument *PtrArg = PointerArgs[i];
Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
// If this is a byval argument, and if the aggregate type is small, just
// pass the elements, which is always safe.
- if (isByVal) {
+ if (PtrArg->hasByValAttr()) {
if (StructType *STy = dyn_cast<StructType>(AgTy)) {
if (maxElements > 0 && STy->getNumElements() > maxElements) {
DEBUG(dbgs() << "argpromotion disable promoting argument '"
@@ -205,7 +201,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
}
// Otherwise, see if we can promote the pointer to its value.
- if (isSafeToPromoteArgument(PtrArg, isByVal))
+ if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValAttr()))
ArgsToPromote.insert(PtrArg);
}
@@ -221,8 +217,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
Function *Callee = Arg->getParent();
- unsigned ArgNo = std::distance(Callee->arg_begin(),
- Function::arg_iterator(Arg));
+ unsigned ArgNo = Arg->getArgNo();
// Look at all call sites of the function. At this pointer we know we only
// have direct callees.
@@ -509,7 +504,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// OriginalLoads - Keep track of a representative load instruction from the
// original function so that we can tell the alias analysis implementation
// what the new GEP/Load instructions we are inserting look like.
- std::map<IndicesVector, LoadInst*> OriginalLoads;
+ // We need to keep the original loads for each argument and the elements
+ // of the argument that are accessed.
+ std::map<std::pair<Argument*, IndicesVector>, LoadInst*> OriginalLoads;
// Attribute - Keep track of the parameter attributes for the arguments
// that we are *not* promoting. For the ones that we do promote, the parameter
@@ -574,7 +571,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
else
// Take any load, we will use it only to update Alias Analysis
OrigLoad = cast<LoadInst>(User->use_back());
- OriginalLoads[Indices] = OrigLoad;
+ OriginalLoads[std::make_pair(I, Indices)] = OrigLoad;
}
// Add a parameter to the function for each element passed in.
@@ -681,7 +678,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
for (ScalarizeTable::iterator SI = ArgIndices.begin(),
E = ArgIndices.end(); SI != E; ++SI) {
Value *V = *AI;
- LoadInst *OrigLoad = OriginalLoads[*SI];
+ LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, *SI)];
if (!SI->empty()) {
Ops.reserve(SI->size());
Type *ElTy = V->getType();
diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index a7bf188..d94c0f4 100644
--- a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -93,9 +93,12 @@ bool ConstantMerge::hasKnownAlignment(GlobalVariable *GV) const {
}
unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const {
+ unsigned Align = GV->getAlignment();
+ if (Align)
+ return Align;
if (TD)
return TD->getPreferredAlignment(GV);
- return GV->getAlignment();
+ return 0;
}
bool ConstantMerge::runOnModule(Module &M) {
@@ -210,9 +213,9 @@ bool ConstantMerge::runOnModule(Module &M) {
// Bump the alignment if necessary.
if (Replacements[i].first->getAlignment() ||
Replacements[i].second->getAlignment()) {
- Replacements[i].second->setAlignment(std::max(
- Replacements[i].first->getAlignment(),
- Replacements[i].second->getAlignment()));
+ Replacements[i].second->setAlignment(
+ std::max(getAlignment(Replacements[i].first),
+ getAlignment(Replacements[i].second)));
}
// Eliminate any uses of the dead global.
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 49ef1e7..911c14e 100644
--- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -211,7 +211,9 @@ void DAE::CollectFunctionDIs(Module &M) {
for (unsigned SPIndex = 0, SPNum = SPs.getNumElements();
SPIndex < SPNum; ++SPIndex) {
DISubprogram SP(SPs.getElement(SPIndex));
- if (!SP.Verify())
+ assert((!SP || SP.isSubprogram()) &&
+ "A MDNode in subprograms of a CU should be null or a DISubprogram.");
+ if (!SP)
continue;
if (Function *F = SP.getFunction())
FunctionDIs[F] = SP;
@@ -263,8 +265,10 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
// to pass in a smaller number of arguments into the new function.
//
std::vector<Value*> Args;
- while (!Fn.use_empty()) {
- CallSite CS(Fn.use_back());
+ for (Value::use_iterator I = Fn.use_begin(), E = Fn.use_end(); I != E; ) {
+ CallSite CS(*I++);
+ if (!CS)
+ continue;
Instruction *Call = CS.getInstruction();
// Pass all the same arguments.
@@ -330,6 +334,11 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
if (DI != FunctionDIs.end())
DI->second.replaceFunction(NF);
+ // Fix up any BlockAddresses that refer to the function.
+ Fn.replaceAllUsesWith(ConstantExpr::getBitCast(NF, Fn.getType()));
+ // Delete the bitcast that we just created, so that NF does not
+ // appear to be address-taken.
+ NF->removeDeadConstantUsers();
// Finally, nuke the old function.
Fn.eraseFromParent();
return true;
@@ -343,8 +352,22 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
if (Fn.isDeclaration() || Fn.mayBeOverridden())
return false;
- // Functions with local linkage should already have been handled.
- if (Fn.hasLocalLinkage())
+ // Functions with local linkage should already have been handled, except the
+ // fragile (variadic) ones which we can improve here.
+ if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg())
+ return false;
+
+ // If a function seen at compile time is not necessarily the one linked to
+ // the binary being built, it is illegal to change the actual arguments
+ // passed to it. These functions can be captured by isWeakForLinker().
+ // *NOTE* that mayBeOverridden() is insufficient for this purpose as it
+ // doesn't include linkage types like AvailableExternallyLinkage and
+ // LinkOnceODRLinkage. Take link_odr* as an example, it indicates a set of
+ // *EQUIVALENT* globals that can be merged at link-time. However, the
+ // semantic of *EQUIVALENT*-functions includes parameters. Changing
+ // parameters breaks this assumption.
+ //
+ if (Fn.isWeakForLinker())
return false;
if (Fn.use_empty())
@@ -604,9 +627,20 @@ void DAE::SurveyFunction(const Function &F) {
UseVector MaybeLiveArgUses;
for (Function::const_arg_iterator AI = F.arg_begin(),
E = F.arg_end(); AI != E; ++AI, ++i) {
- // See what the effect of this use is (recording any uses that cause
- // MaybeLive in MaybeLiveArgUses).
- Liveness Result = SurveyUses(AI, MaybeLiveArgUses);
+ Liveness Result;
+ if (F.getFunctionType()->isVarArg()) {
+ // Variadic functions will already have a va_arg function expanded inside
+ // them, making them potentially very sensitive to ABI changes resulting
+ // from removing arguments entirely, so don't. For example AArch64 handles
+ // register and stack HFAs very differently, and this is reflected in the
+ // IR which has already been generated.
+ Result = Live;
+ } else {
+ // See what the effect of this use is (recording any uses that cause
+ // MaybeLive in MaybeLiveArgUses).
+ Result = SurveyUses(AI, MaybeLiveArgUses);
+ }
+
// Mark the result.
MarkValue(CreateArg(&F, i), Result, MaybeLiveArgUses);
// Clear the vector again for the next iteration.
@@ -695,10 +729,42 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
FunctionType *FTy = F->getFunctionType();
std::vector<Type*> Params;
+ // Keep track of if we have a live 'returned' argument
+ bool HasLiveReturnedArg = false;
+
// Set up to build a new list of parameter attributes.
SmallVector<AttributeSet, 8> AttributesVec;
const AttributeSet &PAL = F->getAttributes();
+ // Remember which arguments are still alive.
+ SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false);
+ // Construct the new parameter list from non-dead arguments. Also construct
+ // a new set of parameter attributes to correspond. Skip the first parameter
+ // attribute, since that belongs to the return value.
+ unsigned i = 0;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++i) {
+ RetOrArg Arg = CreateArg(F, i);
+ if (LiveValues.erase(Arg)) {
+ Params.push_back(I->getType());
+ ArgAlive[i] = true;
+
+ // Get the original parameter attributes (skipping the first one, that is
+ // for the return value.
+ if (PAL.hasAttributes(i + 1)) {
+ AttrBuilder B(PAL, i + 1);
+ if (B.contains(Attribute::Returned))
+ HasLiveReturnedArg = true;
+ AttributesVec.
+ push_back(AttributeSet::get(F->getContext(), Params.size(), B));
+ }
+ } else {
+ ++NumArgumentsEliminated;
+ DEBUG(dbgs() << "DAE - Removing argument " << i << " (" << I->getName()
+ << ") from " << F->getName() << "\n");
+ }
+ }
+
// Find out the new return value.
Type *RetTy = FTy->getReturnType();
Type *NRetTy = NULL;
@@ -707,7 +773,27 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// -1 means unused, other numbers are the new index
SmallVector<int, 5> NewRetIdxs(RetCount, -1);
std::vector<Type*> RetTypes;
- if (RetTy->isVoidTy()) {
+
+ // If there is a function with a live 'returned' argument but a dead return
+ // value, then there are two possible actions:
+ // 1) Eliminate the return value and take off the 'returned' attribute on the
+ // argument.
+ // 2) Retain the 'returned' attribute and treat the return value (but not the
+ // entire function) as live so that it is not eliminated.
+ //
+ // It's not clear in the general case which option is more profitable because,
+ // even in the absence of explicit uses of the return value, code generation
+ // is free to use the 'returned' attribute to do things like eliding
+ // save/restores of registers across calls. Whether or not this happens is
+ // target and ABI-specific as well as depending on the amount of register
+ // pressure, so there's no good way for an IR-level pass to figure this out.
+ //
+ // Fortunately, the only places where 'returned' is currently generated by
+ // the FE are places where 'returned' is basically free and almost always a
+ // performance win, so the second option can just be used always for now.
+ //
+ // This should be revisited if 'returned' is ever applied more liberally.
+ if (RetTy->isVoidTy() || HasLiveReturnedArg) {
NRetTy = RetTy;
} else {
StructType *STy = dyn_cast<StructType>(RetTy);
@@ -777,33 +863,6 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
AttributesVec.push_back(AttributeSet::get(NRetTy->getContext(), RAttrs));
- // Remember which arguments are still alive.
- SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false);
- // Construct the new parameter list from non-dead arguments. Also construct
- // a new set of parameter attributes to correspond. Skip the first parameter
- // attribute, since that belongs to the return value.
- unsigned i = 0;
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I, ++i) {
- RetOrArg Arg = CreateArg(F, i);
- if (LiveValues.erase(Arg)) {
- Params.push_back(I->getType());
- ArgAlive[i] = true;
-
- // Get the original parameter attributes (skipping the first one, that is
- // for the return value.
- if (PAL.hasAttributes(i + 1)) {
- AttrBuilder B(PAL, i + 1);
- AttributesVec.
- push_back(AttributeSet::get(F->getContext(), Params.size(), B));
- }
- } else {
- ++NumArgumentsEliminated;
- DEBUG(dbgs() << "DAE - Removing argument " << i << " (" << I->getName()
- << ") from " << F->getName() << "\n");
- }
- }
-
if (PAL.hasAttributes(AttributeSet::FunctionIndex))
AttributesVec.push_back(AttributeSet::get(F->getContext(),
PAL.getFnAttributes()));
@@ -864,6 +923,13 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// Get original parameter attributes, but skip return attributes.
if (CallPAL.hasAttributes(i + 1)) {
AttrBuilder B(CallPAL, i + 1);
+ // If the return type has changed, then get rid of 'returned' on the
+ // call site. The alternative is to make all 'returned' attributes on
+ // call sites keep the return value alive just like 'returned'
+ // attributes on function declaration but it's less clearly a win
+ // and this is not an expected case anyway
+ if (NRetTy != RetTy && B.contains(Attribute::Returned))
+ B.removeAttribute(Attribute::Returned);
AttributesVec.
push_back(AttributeSet::get(F->getContext(), Args.size(), B));
}
diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
index fa3d72d..50fb3e6 100644
--- a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -21,6 +21,38 @@
#include <algorithm>
using namespace llvm;
+/// Make sure GV is visible from both modules. Delete is true if it is
+/// being deleted from this module.
+/// This also makes sure GV cannot be dropped so that references from
+/// the split module remain valid.
+static void makeVisible(GlobalValue &GV, bool Delete) {
+ bool Local = GV.hasLocalLinkage();
+ if (Local)
+ GV.setVisibility(GlobalValue::HiddenVisibility);
+
+ if (Local || Delete) {
+ GV.setLinkage(GlobalValue::ExternalLinkage);
+ return;
+ }
+
+ if (!GV.hasLinkOnceLinkage()) {
+ assert(!GV.isDiscardableIfUnused());
+ return;
+ }
+
+ // Map linkonce* to weak* so that llvm doesn't drop this GV.
+ switch(GV.getLinkage()) {
+ default:
+ llvm_unreachable("Unexpected linkage");
+ case GlobalValue::LinkOnceAnyLinkage:
+ GV.setLinkage(GlobalValue::WeakAnyLinkage);
+ return;
+ case GlobalValue::LinkOnceODRLinkage:
+ GV.setLinkage(GlobalValue::WeakODRLinkage);
+ return;
+ }
+}
+
namespace {
/// @brief A pass to extract specific functions and their dependencies.
class GVExtractorPass : public ModulePass {
@@ -60,12 +92,7 @@ namespace {
continue;
}
- bool Local = I->isDiscardableIfUnused();
- if (Local)
- I->setVisibility(GlobalValue::HiddenVisibility);
-
- if (Local || Delete)
- I->setLinkage(GlobalValue::ExternalLinkage);
+ makeVisible(*I, Delete);
if (Delete)
I->setInitializer(0);
@@ -80,12 +107,7 @@ namespace {
continue;
}
- bool Local = I->isDiscardableIfUnused();
- if (Local)
- I->setVisibility(GlobalValue::HiddenVisibility);
-
- if (Local || Delete)
- I->setLinkage(GlobalValue::ExternalLinkage);
+ makeVisible(*I, Delete);
if (Delete)
I->deleteBody();
@@ -97,12 +119,10 @@ namespace {
Module::alias_iterator CurI = I;
++I;
- if (CurI->isDiscardableIfUnused()) {
- CurI->setVisibility(GlobalValue::HiddenVisibility);
- CurI->setLinkage(GlobalValue::ExternalLinkage);
- }
+ bool Delete = deleteStuff == (bool)Named.count(CurI);
+ makeVisible(*CurI, Delete);
- if (deleteStuff == (bool)Named.count(CurI)) {
+ if (Delete) {
Type *Ty = CurI->getType()->getElementType();
CurI->removeFromParent();
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index bc5109b..60e5f06 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -9,14 +9,12 @@
//
// This file implements a simple interprocedural pass which walks the
// call-graph, looking for functions which do not access or only read
-// non-local memory, and marking them readnone/readonly. In addition,
-// it marks function arguments (of pointer type) 'nocapture' if a call
-// to the function does not create any copies of the pointer value that
-// outlive the call. This more or less means that the pointer is only
-// dereferenced, and not returned from the function or stored in a global.
-// Finally, well-known library call declarations are marked with all
-// attributes that are consistent with the function's standard definition.
-// This pass is implemented as a bottom-up traversal of the call-graph.
+// non-local memory, and marking them readnone/readonly. It does the
+// same with function arguments independently, marking them readonly/
+// readnone/nocapture. Finally, well-known library call declarations
+// are marked with all attributes that are consistent with the
+// function's standard definition. This pass is implemented as a
+// bottom-up traversal of the call-graph.
//
//===----------------------------------------------------------------------===//
@@ -40,6 +38,8 @@ using namespace llvm;
STATISTIC(NumReadNone, "Number of functions marked readnone");
STATISTIC(NumReadOnly, "Number of functions marked readonly");
STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
+STATISTIC(NumReadNoneArg, "Number of arguments marked readnone");
+STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly");
STATISTIC(NumNoAlias, "Number of function returns marked noalias");
STATISTIC(NumAnnotated, "Number of attributes added to library functions");
@@ -56,8 +56,8 @@ namespace {
// AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
bool AddReadAttrs(const CallGraphSCC &SCC);
- // AddNoCaptureAttrs - Deduce nocapture attributes for the SCC.
- bool AddNoCaptureAttrs(const CallGraphSCC &SCC);
+ // AddArgumentAttrs - Deduce nocapture attributes for the SCC.
+ bool AddArgumentAttrs(const CallGraphSCC &SCC);
// IsFunctionMallocLike - Does this function allocate new memory?
bool IsFunctionMallocLike(Function *F,
@@ -71,36 +71,43 @@ namespace {
void setDoesNotAccessMemory(Function &F) {
if (!F.doesNotAccessMemory()) {
- F.setDoesNotAccessMemory();
- ++NumAnnotated;
+ F.setDoesNotAccessMemory();
+ ++NumAnnotated;
}
}
void setOnlyReadsMemory(Function &F) {
if (!F.onlyReadsMemory()) {
- F.setOnlyReadsMemory();
- ++NumAnnotated;
+ F.setOnlyReadsMemory();
+ ++NumAnnotated;
}
}
void setDoesNotThrow(Function &F) {
if (!F.doesNotThrow()) {
- F.setDoesNotThrow();
- ++NumAnnotated;
+ F.setDoesNotThrow();
+ ++NumAnnotated;
}
}
void setDoesNotCapture(Function &F, unsigned n) {
if (!F.doesNotCapture(n)) {
- F.setDoesNotCapture(n);
- ++NumAnnotated;
+ F.setDoesNotCapture(n);
+ ++NumAnnotated;
+ }
+ }
+
+ void setOnlyReadsMemory(Function &F, unsigned n) {
+ if (!F.onlyReadsMemory(n)) {
+ F.setOnlyReadsMemory(n);
+ ++NumAnnotated;
}
}
void setDoesNotAlias(Function &F, unsigned n) {
if (!F.doesNotAlias(n)) {
- F.setDoesNotAlias(n);
- ++NumAnnotated;
+ F.setDoesNotAlias(n);
+ ++NumAnnotated;
}
}
@@ -129,7 +136,8 @@ namespace {
char FunctionAttrs::ID = 0;
INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
@@ -343,6 +351,7 @@ namespace {
Function *F = CS.getCalledFunction();
if (!F || !SCCNodes.count(F)) { Captured = true; return true; }
+ bool Found = false;
Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
for (CallSite::arg_iterator PI = CS.arg_begin(), PE = CS.arg_end();
PI != PE; ++PI, ++AI) {
@@ -353,10 +362,12 @@ namespace {
}
if (PI == U) {
Uses.push_back(AI);
+ Found = true;
break;
}
}
- assert(!Uses.empty() && "Capturing call-site captured nothing?");
+ assert(Found && "Capturing call-site captured nothing?");
+ (void)Found;
return false;
}
@@ -394,8 +405,100 @@ namespace llvm {
};
}
-/// AddNoCaptureAttrs - Deduce nocapture attributes for the SCC.
-bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
+// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
+static Attribute::AttrKind
+determinePointerReadAttrs(Argument *A,
+ const SmallPtrSet<Argument*, 8> &SCCNodes) {
+
+ SmallVector<Use*, 32> Worklist;
+ SmallSet<Use*, 32> Visited;
+ int Count = 0;
+
+ bool IsRead = false;
+ // We don't need to track IsWritten. If A is written to, return immediately.
+
+ for (Value::use_iterator UI = A->use_begin(), UE = A->use_end();
+ UI != UE; ++UI) {
+ if (Count++ >= 20)
+ return Attribute::None;
+
+ Use *U = &UI.getUse();
+ Visited.insert(U);
+ Worklist.push_back(U);
+ }
+
+ while (!Worklist.empty()) {
+ Use *U = Worklist.pop_back_val();
+ Instruction *I = cast<Instruction>(U->getUser());
+ Value *V = U->get();
+
+ switch (I->getOpcode()) {
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::PHI:
+ case Instruction::Select:
+ // The original value is not read/written via this if the new value isn't.
+ for (Instruction::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI) {
+ Use *U = &UI.getUse();
+ if (Visited.insert(U))
+ Worklist.push_back(U);
+ }
+ break;
+
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ CallSite CS(I);
+ if (CS.doesNotAccessMemory())
+ continue;
+
+ Function *F = CS.getCalledFunction();
+ if (!F) {
+ if (CS.onlyReadsMemory()) {
+ IsRead = true;
+ continue;
+ }
+ return Attribute::None;
+ }
+
+ Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
+ for (CallSite::arg_iterator A = B; A != E; ++A, ++AI) {
+ if (A->get() == V) {
+ if (AI == AE) {
+ assert(F->isVarArg() &&
+ "More params than args in non-varargs call.");
+ return Attribute::None;
+ }
+ if (SCCNodes.count(AI))
+ continue;
+ if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(A - B))
+ return Attribute::None;
+ if (!CS.doesNotAccessMemory(A - B))
+ IsRead = true;
+ }
+ }
+ break;
+ }
+
+ case Instruction::Load:
+ IsRead = true;
+ break;
+
+ case Instruction::ICmp:
+ case Instruction::Ret:
+ break;
+
+ default:
+ return Attribute::None;
+ }
+ }
+
+ return IsRead ? Attribute::ReadOnly : Attribute::ReadNone;
+}
+
+/// AddArgumentAttrs - Deduce nocapture attributes for the SCC.
+bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
bool Changed = false;
SmallPtrSet<Function*, 8> SCCNodes;
@@ -442,8 +545,11 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
continue;
}
- for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A!=E; ++A)
- if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
+ for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end();
+ A != E; ++A) {
+ if (!A->getType()->isPointerTy()) continue;
+ bool HasNonLocalUses = false;
+ if (!A->hasNoCaptureAttr()) {
ArgumentUsesTracker Tracker(SCCNodes);
PointerMayBeCaptured(A, &Tracker);
if (!Tracker.Captured) {
@@ -458,12 +564,32 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
// its particulars for Argument-SCC analysis later.
ArgumentGraphNode *Node = AG[A];
for (SmallVectorImpl<Argument*>::iterator UI = Tracker.Uses.begin(),
- UE = Tracker.Uses.end(); UI != UE; ++UI)
+ UE = Tracker.Uses.end(); UI != UE; ++UI) {
Node->Uses.push_back(AG[*UI]);
+ if (*UI != A)
+ HasNonLocalUses = true;
+ }
}
}
// Otherwise, it's captured. Don't bother doing SCC analysis on it.
}
+ if (!HasNonLocalUses && !A->onlyReadsMemory()) {
+ // Can we determine that it's readonly/readnone without doing an SCC?
+ // Note that we don't allow any calls at all here, or else our result
+ // will be dependent on the iteration order through the functions in the
+ // SCC.
+ SmallPtrSet<Argument*, 8> Self;
+ Self.insert(A);
+ Attribute::AttrKind R = determinePointerReadAttrs(A, Self);
+ if (R != Attribute::None) {
+ AttrBuilder B;
+ B.addAttribute(R);
+ A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+ Changed = true;
+ R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
+ }
+ }
+ }
}
// The graph we've collected is partial because we stopped scanning for
@@ -482,11 +608,8 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
// eg. "void f(int* x) { if (...) f(x); }"
if (ArgumentSCC[0]->Uses.size() == 1 &&
ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
- ArgumentSCC[0]->
- Definition->
- addAttr(AttributeSet::get(ArgumentSCC[0]->Definition->getContext(),
- ArgumentSCC[0]->Definition->getArgNo() + 1,
- B));
+ Argument *A = ArgumentSCC[0]->Definition;
+ A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
++NumNoCapture;
Changed = true;
}
@@ -532,6 +655,42 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
++NumNoCapture;
Changed = true;
}
+
+ // We also want to compute readonly/readnone. With a small number of false
+ // negatives, we can assume that any pointer which is captured isn't going
+ // to be provably readonly or readnone, since by definition we can't
+ // analyze all uses of a captured pointer.
+ //
+ // The false negatives happen when the pointer is captured by a function
+ // that promises readonly/readnone behaviour on the pointer, then the
+ // pointer's lifetime ends before anything that writes to arbitrary memory.
+ // Also, a readonly/readnone pointer may be returned, but returning a
+ // pointer is capturing it.
+
+ Attribute::AttrKind ReadAttr = Attribute::ReadNone;
+ for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
+ Argument *A = ArgumentSCC[i]->Definition;
+ Attribute::AttrKind K = determinePointerReadAttrs(A, ArgumentSCCNodes);
+ if (K == Attribute::ReadNone)
+ continue;
+ if (K == Attribute::ReadOnly) {
+ ReadAttr = Attribute::ReadOnly;
+ continue;
+ }
+ ReadAttr = K;
+ break;
+ }
+
+ if (ReadAttr != Attribute::None) {
+ AttrBuilder B;
+ B.addAttribute(ReadAttr);
+ for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
+ Argument *A = ArgumentSCC[i]->Definition;
+ A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+ ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
+ Changed = true;
+ }
+ }
}
return Changed;
@@ -678,24 +837,32 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setOnlyReadsMemory(F);
setDoesNotThrow(F);
break;
- case LibFunc::strcpy:
- case LibFunc::stpcpy:
- case LibFunc::strcat:
case LibFunc::strtol:
case LibFunc::strtod:
case LibFunc::strtof:
case LibFunc::strtoul:
case LibFunc::strtoll:
case LibFunc::strtold:
+ case LibFunc::strtoull:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 1);
+ break;
+ case LibFunc::strcpy:
+ case LibFunc::stpcpy:
+ case LibFunc::strcat:
case LibFunc::strncat:
case LibFunc::strncpy:
case LibFunc::stpncpy:
- case LibFunc::strtoull:
if (FTy->getNumParams() < 2 ||
!FTy->getParamType(1)->isPointerTy())
return false;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 2);
break;
case LibFunc::strxfrm:
if (FTy->getNumParams() != 3 ||
@@ -705,14 +872,15 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
- break;
- case LibFunc::strcmp:
- case LibFunc::strspn:
- case LibFunc::strncmp:
- case LibFunc::strcspn:
- case LibFunc::strcoll:
- case LibFunc::strcasecmp:
- case LibFunc::strncasecmp:
+ setOnlyReadsMemory(F, 2);
+ break;
+ case LibFunc::strcmp: //0,1
+ case LibFunc::strspn: // 0,1
+ case LibFunc::strncmp: // 0,1
+ case LibFunc::strcspn: //0,1
+ case LibFunc::strcoll: //0,1
+ case LibFunc::strcasecmp: // 0,1
+ case LibFunc::strncasecmp: //
if (FTy->getNumParams() < 2 ||
!FTy->getParamType(0)->isPointerTy() ||
!FTy->getParamType(1)->isPointerTy())
@@ -736,8 +904,15 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
return false;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 2);
break;
case LibFunc::scanf:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F, 1);
+ break;
case LibFunc::setbuf:
case LibFunc::setvbuf:
if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
@@ -753,11 +928,31 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F, 1);
break;
case LibFunc::stat:
+ case LibFunc::statvfs:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 1);
+ break;
case LibFunc::sscanf:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 1);
+ setOnlyReadsMemory(F, 2);
+ break;
case LibFunc::sprintf:
- case LibFunc::statvfs:
if (FTy->getNumParams() < 2 ||
!FTy->getParamType(0)->isPointerTy() ||
!FTy->getParamType(1)->isPointerTy())
@@ -765,6 +960,7 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 2);
break;
case LibFunc::snprintf:
if (FTy->getNumParams() != 3 ||
@@ -774,6 +970,7 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 3);
+ setOnlyReadsMemory(F, 3);
break;
case LibFunc::setitimer:
if (FTy->getNumParams() != 3 ||
@@ -783,6 +980,7 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
setDoesNotCapture(F, 3);
+ setOnlyReadsMemory(F, 2);
break;
case LibFunc::system:
if (FTy->getNumParams() != 1 ||
@@ -790,6 +988,7 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
return false;
// May throw; "system" is a valid pthread cancellation point.
setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F, 1);
break;
case LibFunc::malloc:
if (FTy->getNumParams() != 1 ||
@@ -818,6 +1017,12 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
case LibFunc::modf:
case LibFunc::modff:
case LibFunc::modfl:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
case LibFunc::memcpy:
case LibFunc::memccpy:
case LibFunc::memmove:
@@ -826,6 +1031,7 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
return false;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 2);
break;
case LibFunc::memalign:
if (!FTy->getReturnType()->isPointerTy())
@@ -833,6 +1039,13 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotAlias(F, 0);
break;
case LibFunc::mkdir:
+ if (FTy->getNumParams() == 0 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F, 1);
+ break;
case LibFunc::mktime:
if (FTy->getNumParams() == 0 ||
!FTy->getParamType(0)->isPointerTy())
@@ -856,8 +1069,14 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
// May throw; "read" is a valid pthread cancellation point.
setDoesNotCapture(F, 2);
break;
- case LibFunc::rmdir:
case LibFunc::rewind:
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::rmdir:
case LibFunc::remove:
case LibFunc::realpath:
if (FTy->getNumParams() < 1 ||
@@ -865,8 +1084,19 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
return false;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F, 1);
break;
case LibFunc::rename:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 1);
+ setOnlyReadsMemory(F, 2);
+ break;
case LibFunc::readlink:
if (FTy->getNumParams() < 2 ||
!FTy->getParamType(0)->isPointerTy() ||
@@ -875,12 +1105,14 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 1);
break;
case LibFunc::write:
if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
return false;
// May throw; "write" is a valid pthread cancellation point.
setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 2);
break;
case LibFunc::bcopy:
if (FTy->getNumParams() != 3 ||
@@ -890,6 +1122,7 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 1);
break;
case LibFunc::bcmp:
if (FTy->getNumParams() != 3 ||
@@ -916,6 +1149,12 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
break;
case LibFunc::chmod:
case LibFunc::chown:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F, 1);
+ break;
case LibFunc::ctermid:
case LibFunc::clearerr:
case LibFunc::closedir:
@@ -939,6 +1178,7 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
return false;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F, 1);
break;
case LibFunc::fopen:
if (FTy->getNumParams() != 2 ||
@@ -950,6 +1190,8 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotAlias(F, 0);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 1);
+ setOnlyReadsMemory(F, 2);
break;
case LibFunc::fdopen:
if (FTy->getNumParams() != 2 ||
@@ -959,6 +1201,7 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 2);
break;
case LibFunc::feof:
case LibFunc::free:
@@ -1004,7 +1247,16 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
return false;
setDoesNotThrow(F);
setDoesNotCapture(F, 3);
+ break;
case LibFunc::fread:
+ if (FTy->getNumParams() != 4 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(3)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 4);
+ break;
case LibFunc::fwrite:
if (FTy->getNumParams() != 4 ||
!FTy->getParamType(0)->isPointerTy() ||
@@ -1013,9 +1265,28 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 4);
+ break;
case LibFunc::fputs:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 1);
+ break;
case LibFunc::fscanf:
case LibFunc::fprintf:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 2);
+ break;
case LibFunc::fgetpos:
if (FTy->getNumParams() < 2 ||
!FTy->getParamType(0)->isPointerTy() ||
@@ -1055,6 +1326,7 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
return false;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F, 1);
break;
case LibFunc::ungetc:
if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
@@ -1063,12 +1335,24 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotCapture(F, 2);
break;
case LibFunc::uname:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
case LibFunc::unlink:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F, 1);
+ break;
case LibFunc::unsetenv:
if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
return false;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F, 1);
break;
case LibFunc::utime:
case LibFunc::utimes:
@@ -1079,6 +1363,8 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 1);
+ setOnlyReadsMemory(F, 2);
break;
case LibFunc::putc:
if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
@@ -1093,13 +1379,20 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
return false;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F, 1);
break;
case LibFunc::pread:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "pread" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 2);
+ break;
case LibFunc::pwrite:
if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
return false;
- // May throw; these are valid pthread cancellation points.
+ // May throw; "pwrite" is a valid pthread cancellation point.
setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 2);
break;
case LibFunc::putchar:
setDoesNotThrow(F);
@@ -1114,6 +1407,8 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotAlias(F, 0);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 1);
+ setOnlyReadsMemory(F, 2);
break;
case LibFunc::pclose:
if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
@@ -1126,8 +1421,19 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
return false;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F, 1);
break;
case LibFunc::vsscanf:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 1);
+ setOnlyReadsMemory(F, 2);
+ break;
case LibFunc::vfscanf:
if (FTy->getNumParams() != 3 ||
!FTy->getParamType(1)->isPointerTy() ||
@@ -1136,6 +1442,7 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 2);
break;
case LibFunc::valloc:
if (!FTy->getReturnType()->isPointerTy())
@@ -1148,6 +1455,7 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
return false;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F, 1);
break;
case LibFunc::vfprintf:
case LibFunc::vsprintf:
@@ -1158,6 +1466,7 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 2);
break;
case LibFunc::vsnprintf:
if (FTy->getNumParams() != 4 ||
@@ -1167,12 +1476,14 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 3);
+ setOnlyReadsMemory(F, 3);
break;
case LibFunc::open:
if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
return false;
// May throw; "open" is a valid pthread cancellation point.
setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F, 1);
break;
case LibFunc::opendir:
if (FTy->getNumParams() != 1 ||
@@ -1182,6 +1493,7 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F, 1);
break;
case LibFunc::tmpfile:
if (!FTy->getReturnType()->isPointerTy())
@@ -1210,12 +1522,14 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 1);
break;
case LibFunc::lchown:
if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
return false;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F, 1);
break;
case LibFunc::qsort:
if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
@@ -1232,6 +1546,7 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F, 1);
break;
case LibFunc::dunder_strtok_r:
if (FTy->getNumParams() != 3 ||
@@ -1239,6 +1554,7 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
return false;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 2);
break;
case LibFunc::under_IO_getc:
if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
@@ -1258,10 +1574,20 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
return false;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F, 1);
break;
case LibFunc::stat64:
case LibFunc::lstat64:
case LibFunc::statvfs64:
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 1);
+ break;
case LibFunc::dunder_isoc99_sscanf:
if (FTy->getNumParams() < 1 ||
!FTy->getParamType(0)->isPointerTy() ||
@@ -1270,6 +1596,8 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 1);
+ setOnlyReadsMemory(F, 2);
break;
case LibFunc::fopen64:
if (FTy->getNumParams() != 2 ||
@@ -1281,6 +1609,8 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotAlias(F, 0);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
+ setOnlyReadsMemory(F, 1);
+ setOnlyReadsMemory(F, 2);
break;
case LibFunc::fseeko64:
case LibFunc::ftello64:
@@ -1307,7 +1637,18 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
return false;
// May throw; "open" is a valid pthread cancellation point.
setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F, 1);
break;
+ case LibFunc::gettimeofday:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // Currently some platforms have the restrict keyword on the arguments to
+ // gettimeofday. To be conservative, do not add noalias to gettimeofday's
+ // arguments.
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
default:
// Didn't mark any attributes.
return false;
@@ -1339,7 +1680,7 @@ bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
bool Changed = annotateLibraryCalls(SCC);
Changed |= AddReadAttrs(SCC);
- Changed |= AddNoCaptureAttrs(SCC);
+ Changed |= AddArgumentAttrs(SCC);
Changed |= AddNoAliasAttrs(SCC);
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index 201f320..901295d 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -179,6 +179,9 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
// any globals used will be marked as needed.
Function *F = cast<Function>(G);
+ if (F->hasPrefixData())
+ MarkUsedGlobalsAsNeeded(F->getPrefixData());
+
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U)
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 0ef900e..2ea89a1 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -37,7 +37,10 @@
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ValueHandle.h"
#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
using namespace llvm;
@@ -59,7 +62,6 @@ STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
namespace {
- struct GlobalStatus;
struct GlobalOpt : public ModulePass {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetLibraryInfo>();
@@ -79,7 +81,6 @@ namespace {
bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
- const SmallPtrSet<const PHINode*, 16> &PHIUsers,
const GlobalStatus &GS);
bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
@@ -97,209 +98,6 @@ INITIALIZE_PASS_END(GlobalOpt, "globalopt",
ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
-namespace {
-
-/// GlobalStatus - As we analyze each global, keep track of some information
-/// about it. If we find out that the address of the global is taken, none of
-/// this info will be accurate.
-struct GlobalStatus {
- /// isCompared - True if the global's address is used in a comparison.
- bool isCompared;
-
- /// isLoaded - True if the global is ever loaded. If the global isn't ever
- /// loaded it can be deleted.
- bool isLoaded;
-
- /// StoredType - Keep track of what stores to the global look like.
- ///
- enum StoredType {
- /// NotStored - There is no store to this global. It can thus be marked
- /// constant.
- NotStored,
-
- /// isInitializerStored - This global is stored to, but the only thing
- /// stored is the constant it was initialized with. This is only tracked
- /// for scalar globals.
- isInitializerStored,
-
- /// isStoredOnce - This global is stored to, but only its initializer and
- /// one other value is ever stored to it. If this global isStoredOnce, we
- /// track the value stored to it in StoredOnceValue below. This is only
- /// tracked for scalar globals.
- isStoredOnce,
-
- /// isStored - This global is stored to by multiple values or something else
- /// that we cannot track.
- isStored
- } StoredType;
-
- /// StoredOnceValue - If only one value (besides the initializer constant) is
- /// ever stored to this global, keep track of what value it is.
- Value *StoredOnceValue;
-
- /// AccessingFunction/HasMultipleAccessingFunctions - These start out
- /// null/false. When the first accessing function is noticed, it is recorded.
- /// When a second different accessing function is noticed,
- /// HasMultipleAccessingFunctions is set to true.
- const Function *AccessingFunction;
- bool HasMultipleAccessingFunctions;
-
- /// HasNonInstructionUser - Set to true if this global has a user that is not
- /// an instruction (e.g. a constant expr or GV initializer).
- bool HasNonInstructionUser;
-
- /// AtomicOrdering - Set to the strongest atomic ordering requirement.
- AtomicOrdering Ordering;
-
- GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored),
- StoredOnceValue(0), AccessingFunction(0),
- HasMultipleAccessingFunctions(false),
- HasNonInstructionUser(false), Ordering(NotAtomic) {}
-};
-
-}
-
-/// StrongerOrdering - Return the stronger of the two ordering. If the two
-/// orderings are acquire and release, then return AcquireRelease.
-///
-static AtomicOrdering StrongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
- if (X == Acquire && Y == Release) return AcquireRelease;
- if (Y == Acquire && X == Release) return AcquireRelease;
- return (AtomicOrdering)std::max(X, Y);
-}
-
-/// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
-/// by constants itself. Note that constants cannot be cyclic, so this test is
-/// pretty easy to implement recursively.
-///
-static bool SafeToDestroyConstant(const Constant *C) {
- if (isa<GlobalValue>(C)) return false;
-
- for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
- ++UI)
- if (const Constant *CU = dyn_cast<Constant>(*UI)) {
- if (!SafeToDestroyConstant(CU)) return false;
- } else
- return false;
- return true;
-}
-
-
-/// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus
-/// structure. If the global has its address taken, return true to indicate we
-/// can't do anything with it.
-///
-static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
- SmallPtrSet<const PHINode*, 16> &PHIUsers) {
- for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
- ++UI) {
- const User *U = *UI;
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
- GS.HasNonInstructionUser = true;
-
- // If the result of the constantexpr isn't pointer type, then we won't
- // know to expect it in various places. Just reject early.
- if (!isa<PointerType>(CE->getType())) return true;
-
- if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
- } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
- if (!GS.HasMultipleAccessingFunctions) {
- const Function *F = I->getParent()->getParent();
- if (GS.AccessingFunction == 0)
- GS.AccessingFunction = F;
- else if (GS.AccessingFunction != F)
- GS.HasMultipleAccessingFunctions = true;
- }
- if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
- GS.isLoaded = true;
- // Don't hack on volatile loads.
- if (LI->isVolatile()) return true;
- GS.Ordering = StrongerOrdering(GS.Ordering, LI->getOrdering());
- } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
- // Don't allow a store OF the address, only stores TO the address.
- if (SI->getOperand(0) == V) return true;
-
- // Don't hack on volatile stores.
- if (SI->isVolatile()) return true;
-
- GS.Ordering = StrongerOrdering(GS.Ordering, SI->getOrdering());
-
- // If this is a direct store to the global (i.e., the global is a scalar
- // value, not an aggregate), keep more specific information about
- // stores.
- if (GS.StoredType != GlobalStatus::isStored) {
- if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(
- SI->getOperand(1))) {
- Value *StoredVal = SI->getOperand(0);
-
- if (Constant *C = dyn_cast<Constant>(StoredVal)) {
- if (C->isThreadDependent()) {
- // The stored value changes between threads; don't track it.
- return true;
- }
- }
-
- if (StoredVal == GV->getInitializer()) {
- if (GS.StoredType < GlobalStatus::isInitializerStored)
- GS.StoredType = GlobalStatus::isInitializerStored;
- } else if (isa<LoadInst>(StoredVal) &&
- cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
- if (GS.StoredType < GlobalStatus::isInitializerStored)
- GS.StoredType = GlobalStatus::isInitializerStored;
- } else if (GS.StoredType < GlobalStatus::isStoredOnce) {
- GS.StoredType = GlobalStatus::isStoredOnce;
- GS.StoredOnceValue = StoredVal;
- } else if (GS.StoredType == GlobalStatus::isStoredOnce &&
- GS.StoredOnceValue == StoredVal) {
- // noop.
- } else {
- GS.StoredType = GlobalStatus::isStored;
- }
- } else {
- GS.StoredType = GlobalStatus::isStored;
- }
- }
- } else if (isa<BitCastInst>(I)) {
- if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
- } else if (isa<GetElementPtrInst>(I)) {
- if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
- } else if (isa<SelectInst>(I)) {
- if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
- } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
- // PHI nodes we can check just like select or GEP instructions, but we
- // have to be careful about infinite recursion.
- if (PHIUsers.insert(PN)) // Not already visited.
- if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
- } else if (isa<CmpInst>(I)) {
- GS.isCompared = true;
- } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
- if (MTI->isVolatile()) return true;
- if (MTI->getArgOperand(0) == V)
- GS.StoredType = GlobalStatus::isStored;
- if (MTI->getArgOperand(1) == V)
- GS.isLoaded = true;
- } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
- assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!");
- if (MSI->isVolatile()) return true;
- GS.StoredType = GlobalStatus::isStored;
- } else {
- return true; // Any other non-load instruction might take address!
- }
- } else if (const Constant *C = dyn_cast<Constant>(U)) {
- GS.HasNonInstructionUser = true;
- // We might have a dead and dangling constant hanging off of here.
- if (!SafeToDestroyConstant(C))
- return true;
- } else {
- GS.HasNonInstructionUser = true;
- // Otherwise must be some other user.
- return true;
- }
- }
-
- return false;
-}
-
/// isLeakCheckerRoot - Is this global variable possibly used by a leak checker
/// as a root? If so, we might not really want to eliminate the stores to it.
static bool isLeakCheckerRoot(GlobalVariable *GV) {
@@ -433,7 +231,7 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
Changed = true;
}
} else if (Constant *C = dyn_cast<Constant>(U)) {
- if (SafeToDestroyConstant(C)) {
+ if (isSafeToDestroyConstant(C)) {
C->destroyConstant();
// This could have invalidated UI, start over from scratch.
Dead.clear();
@@ -470,9 +268,17 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
DataLayout *TD, TargetLibraryInfo *TLI) {
bool Changed = false;
- SmallVector<User*, 8> WorkList(V->use_begin(), V->use_end());
+ // Note that we need to use a weak value handle for the worklist items. When
+ // we delete a constant array, we may also be holding pointer to one of its
+ // elements (or an element of one of its elements if we're dealing with an
+ // array of arrays) in the worklist.
+ SmallVector<WeakVH, 8> WorkList(V->use_begin(), V->use_end());
while (!WorkList.empty()) {
- User *U = WorkList.pop_back_val();
+ Value *UV = WorkList.pop_back_val();
+ if (!UV)
+ continue;
+
+ User *U = cast<User>(UV);
if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
if (Init) {
@@ -533,7 +339,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
} else if (Constant *C = dyn_cast<Constant>(U)) {
// If we have a chain of dead constantexprs or other things dangling from
// us, and if they are all dead, nuke them without remorse.
- if (SafeToDestroyConstant(C)) {
+ if (isSafeToDestroyConstant(C)) {
C->destroyConstant();
CleanupConstantGlobalUsers(V, Init, TD, TLI);
return true;
@@ -548,7 +354,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
static bool isSafeSROAElementUse(Value *V) {
// We might have a dead and dangling constant hanging off of here.
if (Constant *C = dyn_cast<Constant>(V))
- return SafeToDestroyConstant(C);
+ return isSafeToDestroyConstant(C);
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
@@ -1372,8 +1178,7 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
} else if (PHINode *PN = dyn_cast<PHINode>(V)) {
// PN's type is pointer to struct. Make a new PHI of pointer to struct
// field.
- StructType *ST =
- cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
+ StructType *ST = cast<StructType>(PN->getType()->getPointerElementType());
PHINode *NewPN =
PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
@@ -1504,7 +1309,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
if (StructType *ST = dyn_cast<StructType>(FieldTy))
TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
- Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(CI->getType());
Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
ConstantInt::get(IntPtrTy, TypeSize),
NElems, 0,
@@ -1734,7 +1539,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// If this is a fixed size array, transform the Malloc to be an alloc of
// structs. malloc [100 x struct],1 -> malloc struct, 100
if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
- Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(CI->getType());
unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
@@ -1916,13 +1721,12 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
if (!GV->hasLocalLinkage())
return false;
- SmallPtrSet<const PHINode*, 16> PHIUsers;
GlobalStatus GS;
- if (AnalyzeGlobal(GV, GS, PHIUsers))
+ if (GlobalStatus::analyzeGlobal(GV, GS))
return false;
- if (!GS.isCompared && !GV->hasUnnamedAddr()) {
+ if (!GS.IsCompared && !GV->hasUnnamedAddr()) {
GV->setUnnamedAddr(true);
NumUnnamed++;
}
@@ -1930,19 +1734,17 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
if (GV->isConstant() || !GV->hasInitializer())
return false;
- return ProcessInternalGlobal(GV, GVI, PHIUsers, GS);
+ return ProcessInternalGlobal(GV, GVI, GS);
}
/// ProcessInternalGlobal - Analyze the specified global variable and optimize
/// it if possible. If we make a change, return true.
bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
Module::global_iterator &GVI,
- const SmallPtrSet<const PHINode*, 16> &PHIUsers,
const GlobalStatus &GS) {
// If this is a first class global and has only one accessing function
- // and this function is main (which we know is not recursive we can make
- // this global a local variable) we replace the global with a local alloca
- // in this function.
+ // and this function is main (which we know is not recursive), we replace
+ // the global with a local alloca in this function.
//
// NOTE: It doesn't make sense to promote non single-value types since we
// are just replacing static memory to stack memory.
@@ -1971,7 +1773,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
// If the global is never loaded (but may be stored to), it is dead.
// Delete it now.
- if (!GS.isLoaded) {
+ if (!GS.IsLoaded) {
DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
bool Changed;
@@ -1992,7 +1794,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
}
return Changed;
- } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
+ } else if (GS.StoredType <= GlobalStatus::InitializerStored) {
DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
GV->setConstant(true);
@@ -2015,7 +1817,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
GVI = FirstNewGV; // Don't skip the newly produced globals!
return true;
}
- } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
+ } else if (GS.StoredType == GlobalStatus::StoredOnce) {
// If the initial value for the global was an undef value, and if only
// one other value was stored into it, we can just change the
// initializer to be the stored value, then delete all stores to the
@@ -2048,11 +1850,14 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
// Otherwise, if the global was not a boolean, we can shrink it to be a
// boolean.
- if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
- if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
- ++NumShrunkToBool;
- return true;
+ if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) {
+ if (GS.Ordering == NotAtomic) {
+ if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
+ ++NumShrunkToBool;
+ return true;
+ }
}
+ }
}
return false;
@@ -2210,8 +2015,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
CSVals[1] = 0;
StructType *StructTy =
- cast <StructType>(
- cast<ArrayType>(GCL->getType()->getElementType())->getElementType());
+ cast<StructType>(GCL->getType()->getElementType()->getArrayElementType());
// Create the new init list.
std::vector<Constant*> CAList;
@@ -2784,7 +2588,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
Value *Ptr = PtrArg->stripPointerCasts();
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
Type *ElemTy = cast<PointerType>(GV->getType())->getElementType();
- if (!Size->isAllOnesValue() &&
+ if (TD && !Size->isAllOnesValue() &&
Size->getValue().getLimitedValue() >=
TD->getTypeStoreSize(ElemTy)) {
Invariants.insert(GV);
@@ -3041,107 +2845,148 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
return true;
}
-static Value::use_iterator getFirst(Value *V, SmallPtrSet<Use*, 8> &Tried) {
- for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
- Use *U = &I.getUse();
- if (Tried.count(U))
- continue;
-
- User *Usr = *I;
- GlobalVariable *GV = dyn_cast<GlobalVariable>(Usr);
- if (!GV || !GV->hasName()) {
- Tried.insert(U);
- return I;
- }
-
- StringRef Name = GV->getName();
- if (Name != "llvm.used" && Name != "llvm.compiler_used") {
- Tried.insert(U);
- return I;
- }
- }
- return V->use_end();
+static int compareNames(Constant *const *A, Constant *const *B) {
+ return (*A)->getName().compare((*B)->getName());
}
-static bool replaceAllNonLLVMUsedUsesWith(Constant *Old, Constant *New);
-
-static bool replaceUsesOfWithOnConstant(ConstantArray *CA, Value *From,
- Value *ToV, Use *U) {
- Constant *To = cast<Constant>(ToV);
-
- SmallVector<Constant*, 8> NewOps;
- for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) {
- Constant *Op = CA->getOperand(i);
- NewOps.push_back(Op == From ? To : Op);
+static void setUsedInitializer(GlobalVariable &V,
+ SmallPtrSet<GlobalValue *, 8> Init) {
+ if (Init.empty()) {
+ V.eraseFromParent();
+ return;
}
- Constant *Replacement = ConstantArray::get(CA->getType(), NewOps);
- assert(Replacement != CA && "CA didn't contain From!");
+ SmallVector<llvm::Constant *, 8> UsedArray;
+ PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext());
- bool Ret = replaceAllNonLLVMUsedUsesWith(CA, Replacement);
- if (Replacement->use_empty())
- Replacement->destroyConstant();
- if (CA->use_empty())
- CA->destroyConstant();
- return Ret;
+ for (SmallPtrSet<GlobalValue *, 8>::iterator I = Init.begin(), E = Init.end();
+ I != E; ++I) {
+ Constant *Cast = llvm::ConstantExpr::getBitCast(*I, Int8PtrTy);
+ UsedArray.push_back(Cast);
+ }
+ // Sort to get deterministic order.
+ array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames);
+ ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size());
+
+ Module *M = V.getParent();
+ V.removeFromParent();
+ GlobalVariable *NV =
+ new GlobalVariable(*M, ATy, false, llvm::GlobalValue::AppendingLinkage,
+ llvm::ConstantArray::get(ATy, UsedArray), "");
+ NV->takeName(&V);
+ NV->setSection("llvm.metadata");
+ delete &V;
}
-static bool replaceUsesOfWithOnConstant(ConstantExpr *CE, Value *From,
- Value *ToV, Use *U) {
- Constant *To = cast<Constant>(ToV);
- SmallVector<Constant*, 8> NewOps;
- for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) {
- Constant *Op = CE->getOperand(i);
- NewOps.push_back(Op == From ? To : Op);
+namespace {
+/// \brief An easy to access representation of llvm.used and llvm.compiler.used.
+class LLVMUsed {
+ SmallPtrSet<GlobalValue *, 8> Used;
+ SmallPtrSet<GlobalValue *, 8> CompilerUsed;
+ GlobalVariable *UsedV;
+ GlobalVariable *CompilerUsedV;
+
+public:
+ LLVMUsed(Module &M) {
+ UsedV = collectUsedGlobalVariables(M, Used, false);
+ CompilerUsedV = collectUsedGlobalVariables(M, CompilerUsed, true);
+ }
+ typedef SmallPtrSet<GlobalValue *, 8>::iterator iterator;
+ iterator usedBegin() { return Used.begin(); }
+ iterator usedEnd() { return Used.end(); }
+ iterator compilerUsedBegin() { return CompilerUsed.begin(); }
+ iterator compilerUsedEnd() { return CompilerUsed.end(); }
+ bool usedCount(GlobalValue *GV) const { return Used.count(GV); }
+ bool compilerUsedCount(GlobalValue *GV) const {
+ return CompilerUsed.count(GV);
+ }
+ bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
+ bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
+ bool usedInsert(GlobalValue *GV) { return Used.insert(GV); }
+ bool compilerUsedInsert(GlobalValue *GV) { return CompilerUsed.insert(GV); }
+
+ void syncVariablesAndSets() {
+ if (UsedV)
+ setUsedInitializer(*UsedV, Used);
+ if (CompilerUsedV)
+ setUsedInitializer(*CompilerUsedV, CompilerUsed);
}
+};
+}
- Constant *Replacement = CE->getWithOperands(NewOps);
- assert(Replacement != CE && "CE didn't contain From!");
+static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
+ if (GA.use_empty()) // No use at all.
+ return false;
- bool Ret = replaceAllNonLLVMUsedUsesWith(CE, Replacement);
- if (Replacement->use_empty())
- Replacement->destroyConstant();
- if (CE->use_empty())
- CE->destroyConstant();
- return Ret;
+ assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&
+ "We should have removed the duplicated "
+ "element from llvm.compiler.used");
+ if (!GA.hasOneUse())
+ // Strictly more than one use. So at least one is not in llvm.used and
+ // llvm.compiler.used.
+ return true;
+
+ // Exactly one use. Check if it is in llvm.used or llvm.compiler.used.
+ return !U.usedCount(&GA) && !U.compilerUsedCount(&GA);
}
-static bool replaceUsesOfWithOnConstant(Constant *C, Value *From, Value *To,
- Use *U) {
- if (ConstantArray *CA = dyn_cast<ConstantArray>(C))
- return replaceUsesOfWithOnConstant(CA, From, To, U);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- return replaceUsesOfWithOnConstant(CE, From, To, U);
- C->replaceUsesOfWithOnConstant(From, To, U);
- return true;
+static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V,
+ const LLVMUsed &U) {
+ unsigned N = 2;
+ assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&
+ "We should have removed the duplicated "
+ "element from llvm.compiler.used");
+ if (U.usedCount(&V) || U.compilerUsedCount(&V))
+ ++N;
+ return V.hasNUsesOrMore(N);
}
-static bool replaceAllNonLLVMUsedUsesWith(Constant *Old, Constant *New) {
- SmallPtrSet<Use*, 8> Tried;
- bool Ret = false;
- for (;;) {
- Value::use_iterator I = getFirst(Old, Tried);
- if (I == Old->use_end())
- break;
- Use &U = I.getUse();
+static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) {
+ if (!GA.hasLocalLinkage())
+ return true;
- // Must handle Constants specially, we cannot call replaceUsesOfWith on a
- // constant because they are uniqued.
- if (Constant *C = dyn_cast<Constant>(U.getUser())) {
- if (!isa<GlobalValue>(C)) {
- Ret |= replaceUsesOfWithOnConstant(C, Old, New, &U);
- continue;
- }
- }
+ return U.usedCount(&GA) || U.compilerUsedCount(&GA);
+}
- U.set(New);
+static bool hasUsesToReplace(GlobalAlias &GA, LLVMUsed &U, bool &RenameTarget) {
+ RenameTarget = false;
+ bool Ret = false;
+ if (hasUseOtherThanLLVMUsed(GA, U))
Ret = true;
- }
- return Ret;
+
+ // If the alias is externally visible, we may still be able to simplify it.
+ if (!mayHaveOtherReferences(GA, U))
+ return Ret;
+
+ // If the aliasee has internal linkage, give it the name and linkage
+ // of the alias, and delete the alias. This turns:
+ // define internal ... @f(...)
+ // @a = alias ... @f
+ // into:
+ // define ... @a(...)
+ Constant *Aliasee = GA.getAliasee();
+ GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
+ if (!Target->hasLocalLinkage())
+ return Ret;
+
+ // Do not perform the transform if multiple aliases potentially target the
+ // aliasee. This check also ensures that it is safe to replace the section
+ // and other attributes of the aliasee with those of the alias.
+ if (hasMoreThanOneUseOtherThanLLVMUsed(*Target, U))
+ return Ret;
+
+ RenameTarget = true;
+ return true;
}
bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
bool Changed = false;
+ LLVMUsed Used(M);
+
+ for (SmallPtrSet<GlobalValue *, 8>::iterator I = Used.usedBegin(),
+ E = Used.usedEnd();
+ I != E; ++I)
+ Used.compilerUsedErase(*I);
for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
I != E;) {
@@ -3156,38 +3001,29 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
Constant *Aliasee = J->getAliasee();
GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
Target->removeDeadConstantUsers();
- bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse();
// Make all users of the alias use the aliasee instead.
- if (replaceAllNonLLVMUsedUsesWith(J, Aliasee)) {
- ++NumAliasesResolved;
- Changed = true;
- }
- if (!J->use_empty())
+ bool RenameTarget;
+ if (!hasUsesToReplace(*J, Used, RenameTarget))
continue;
- // If the alias is externally visible, we may still be able to simplify it.
- if (!J->hasLocalLinkage()) {
- // If the aliasee has internal linkage, give it the name and linkage
- // of the alias, and delete the alias. This turns:
- // define internal ... @f(...)
- // @a = alias ... @f
- // into:
- // define ... @a(...)
- if (!Target->hasLocalLinkage())
- continue;
-
- // Do not perform the transform if multiple aliases potentially target the
- // aliasee. This check also ensures that it is safe to replace the section
- // and other attributes of the aliasee with those of the alias.
- if (!hasOneUse)
- continue;
+ J->replaceAllUsesWith(Aliasee);
+ ++NumAliasesResolved;
+ Changed = true;
+ if (RenameTarget) {
// Give the aliasee the name, linkage and other attributes of the alias.
Target->takeName(J);
Target->setLinkage(J->getLinkage());
Target->GlobalValue::copyAttributesFrom(J);
- }
+
+ if (Used.usedErase(J))
+ Used.usedInsert(Target);
+
+ if (Used.compilerUsedErase(J))
+ Used.compilerUsedInsert(Target);
+ } else if (mayHaveOtherReferences(*J, Used))
+ continue;
// Delete the alias.
M.getAliasList().erase(J);
@@ -3195,6 +3031,8 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
Changed = true;
}
+ Used.syncVariablesAndSets();
+
return Changed;
}
@@ -3323,8 +3161,6 @@ bool GlobalOpt::runOnModule(Module &M) {
// Try to find the llvm.globalctors list.
GlobalVariable *GlobalCtors = FindGlobalCtors(M);
- Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
-
bool LocalChange = true;
while (LocalChange) {
LocalChange = false;
@@ -3342,7 +3178,9 @@ bool GlobalOpt::runOnModule(Module &M) {
// Resolve aliases, when possible.
LocalChange |= OptimizeGlobalAliases(M);
- // Try to remove trivial global destructors.
+ // Try to remove trivial global destructors if they are not removed
+ // already.
+ Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
if (CXAAtExitFn)
LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
index a0095da..437597e 100644
--- a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
@@ -63,7 +63,7 @@ public:
char AlwaysInliner::ID = 0;
INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
"Inliner for always_inline functions", false, false)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
"Inliner for always_inline functions", false, false)
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
index a4f7026..57379a3 100644
--- a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
@@ -28,7 +28,7 @@ using namespace llvm;
namespace {
-/// \brief Actaul inliner pass implementation.
+/// \brief Actual inliner pass implementation.
///
/// The common implementation of the inlining logic is shared between this
/// inliner pass and the always inliner pass. The two passes use different cost
@@ -61,7 +61,7 @@ public:
char SimpleInliner::ID = 0;
INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
"Function Integration/Inlining", false, false)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
INITIALIZE_PASS_END(SimpleInliner, "inline",
"Function Integration/Inlining", false, false)
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
index 663ddb7..d75d6ca 100644
--- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -116,7 +116,8 @@ static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) {
/// any new allocas to the set if not possible.
static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
InlinedArrayAllocasTy &InlinedArrayAllocas,
- int InlineHistory, bool InsertLifetime) {
+ int InlineHistory, bool InsertLifetime,
+ const DataLayout *TD) {
Function *Callee = CS.getCalledFunction();
Function *Caller = CS.getCaller();
@@ -189,6 +190,14 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
bool MergedAwayAlloca = false;
for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) {
AllocaInst *AvailableAlloca = AllocasForType[i];
+
+ unsigned Align1 = AI->getAlignment(),
+ Align2 = AvailableAlloca->getAlignment();
+ // If we don't have data layout information, and only one alloca is using
+ // the target default, then we can't safely merge them because we can't
+ // pick the greater alignment.
+ if (!TD && (!Align1 || !Align2) && Align1 != Align2)
+ continue;
// The available alloca has to be in the right function, not in some other
// function in this SCC.
@@ -206,6 +215,20 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
<< *AvailableAlloca << '\n');
AI->replaceAllUsesWith(AvailableAlloca);
+
+ if (Align1 != Align2) {
+ if (!Align1 || !Align2) {
+ assert(TD && "DataLayout required to compare default alignments");
+ unsigned TypeAlign = TD->getABITypeAlignment(AI->getAllocatedType());
+
+ Align1 = Align1 ? Align1 : TypeAlign;
+ Align2 = Align2 ? Align2 : TypeAlign;
+ }
+
+ if (Align1 > Align2)
+ AvailableAlloca->setAlignment(AI->getAlignment());
+ }
+
AI->eraseFromParent();
MergedAwayAlloca = true;
++NumMergedAllocas;
@@ -482,7 +505,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
// Attempt to inline the function.
if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
- InlineHistoryID, InsertLifetime))
+ InlineHistoryID, InsertLifetime, TD))
continue;
++NumInlined;
diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
index 4bfab5b..64e2ced 100644
--- a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -11,10 +11,17 @@
// If the function or variable is not in the list of external names given to
// the pass it is marked as internal.
//
+// This transformation would not be legal in a regular compilation, but it gets
+// extra information from the linker about what is safe.
+//
+// For example: Internalizing a function with external linkage. Only if we are
+// told it is only used from within this module, it is safe to do it.
+//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "internalize"
#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/Module.h"
@@ -22,6 +29,8 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <fstream>
#include <set>
using namespace llvm;
@@ -48,10 +57,8 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit InternalizePass();
- explicit InternalizePass(ArrayRef<const char *> exportList);
+ explicit InternalizePass(ArrayRef<const char *> ExportList);
void LoadFile(const char *Filename);
- void ClearExportList();
- void AddToExportList(const std::string &val);
virtual bool runOnModule(Module &M);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -70,15 +77,14 @@ InternalizePass::InternalizePass()
initializeInternalizePassPass(*PassRegistry::getPassRegistry());
if (!APIFile.empty()) // If a filename is specified, use it.
LoadFile(APIFile.c_str());
- if (!APIList.empty()) // If a list is specified, use it as well.
- ExternalNames.insert(APIList.begin(), APIList.end());
+ ExternalNames.insert(APIList.begin(), APIList.end());
}
-InternalizePass::InternalizePass(ArrayRef<const char *> exportList)
+InternalizePass::InternalizePass(ArrayRef<const char *> ExportList)
: ModulePass(ID){
initializeInternalizePassPass(*PassRegistry::getPassRegistry());
- for(ArrayRef<const char *>::const_iterator itr = exportList.begin();
- itr != exportList.end(); itr++) {
+ for(ArrayRef<const char *>::const_iterator itr = ExportList.begin();
+ itr != ExportList.end(); itr++) {
ExternalNames.insert(*itr);
}
}
@@ -99,12 +105,25 @@ void InternalizePass::LoadFile(const char *Filename) {
}
}
-void InternalizePass::ClearExportList() {
- ExternalNames.clear();
-}
+static bool shouldInternalize(const GlobalValue &GV,
+ const std::set<std::string> &ExternalNames) {
+ // Function must be defined here
+ if (GV.isDeclaration())
+ return false;
+
+ // Available externally is really just a "declaration with a body".
+ if (GV.hasAvailableExternallyLinkage())
+ return false;
+
+ // Already has internal linkage
+ if (GV.hasLocalLinkage())
+ return false;
+
+ // Marked to keep external?
+ if (ExternalNames.count(GV.getName()))
+ return false;
-void InternalizePass::AddToExportList(const std::string &val) {
- ExternalNames.insert(val);
+ return true;
}
bool InternalizePass::runOnModule(Module &M) {
@@ -112,26 +131,40 @@ bool InternalizePass::runOnModule(Module &M) {
CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
bool Changed = false;
- // Never internalize functions which code-gen might insert.
- // FIXME: We should probably add this (and the __stack_chk_guard) via some
- // type of call-back in CodeGen.
- ExternalNames.insert("__stack_chk_fail");
+ SmallPtrSet<GlobalValue *, 8> Used;
+ collectUsedGlobalVariables(M, Used, false);
+
+ // We must assume that globals in llvm.used have a reference that not even
+ // the linker can see, so we don't internalize them.
+ // For llvm.compiler.used the situation is a bit fuzzy. The assembler and
+ // linker can drop those symbols. If this pass is running as part of LTO,
+ // one might think that it could just drop llvm.compiler.used. The problem
+ // is that even in LTO llvm doesn't see every reference. For example,
+ // we don't see references from function local inline assembly. To be
+ // conservative, we internalize symbols in llvm.compiler.used, but we
+ // keep llvm.compiler.used so that the symbol is not deleted by llvm.
+ for (SmallPtrSet<GlobalValue *, 8>::iterator I = Used.begin(), E = Used.end();
+ I != E; ++I) {
+ GlobalValue *V = *I;
+ ExternalNames.insert(V->getName());
+ }
// Mark all functions not in the api as internal.
// FIXME: maybe use private linkage?
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (!I->isDeclaration() && // Function must be defined here
- // Available externally is really just a "declaration with a body".
- !I->hasAvailableExternallyLinkage() &&
- !I->hasLocalLinkage() && // Can't already have internal linkage
- !ExternalNames.count(I->getName())) {// Not marked to keep external?
- I->setLinkage(GlobalValue::InternalLinkage);
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (!shouldInternalize(*I, ExternalNames))
+ continue;
+
+ I->setLinkage(GlobalValue::InternalLinkage);
+
+ if (ExternalNode)
// Remove a callgraph edge from the external node to this function.
- if (ExternalNode) ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
- Changed = true;
- ++NumFunctions;
- DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n");
- }
+ ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
+
+ Changed = true;
+ ++NumFunctions;
+ DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n");
+ }
// Never internalize the llvm.used symbol. It is used to implement
// attribute((used)).
@@ -146,35 +179,36 @@ bool InternalizePass::runOnModule(Module &M) {
ExternalNames.insert("llvm.global.annotations");
// Never internalize symbols code-gen inserts.
+ // FIXME: We should probably add this (and the __stack_chk_guard) via some
+ // type of call-back in CodeGen.
+ ExternalNames.insert("__stack_chk_fail");
ExternalNames.insert("__stack_chk_guard");
// Mark all global variables with initializers that are not in the api as
// internal as well.
// FIXME: maybe use private linkage?
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
- if (!I->isDeclaration() && !I->hasLocalLinkage() &&
- // Available externally is really just a "declaration with a body".
- !I->hasAvailableExternallyLinkage() &&
- !ExternalNames.count(I->getName())) {
- I->setLinkage(GlobalValue::InternalLinkage);
- Changed = true;
- ++NumGlobals;
- DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
- }
+ I != E; ++I) {
+ if (!shouldInternalize(*I, ExternalNames))
+ continue;
+
+ I->setLinkage(GlobalValue::InternalLinkage);
+ Changed = true;
+ ++NumGlobals;
+ DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
+ }
// Mark all aliases that are not in the api as internal as well.
for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
- I != E; ++I)
- if (!I->isDeclaration() && !I->hasInternalLinkage() &&
- // Available externally is really just a "declaration with a body".
- !I->hasAvailableExternallyLinkage() &&
- !ExternalNames.count(I->getName())) {
- I->setLinkage(GlobalValue::InternalLinkage);
- Changed = true;
- ++NumAliases;
- DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
- }
+ I != E; ++I) {
+ if (!shouldInternalize(*I, ExternalNames))
+ continue;
+
+ I->setLinkage(GlobalValue::InternalLinkage);
+ Changed = true;
+ ++NumAliases;
+ DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
+ }
return Changed;
}
@@ -183,6 +217,6 @@ ModulePass *llvm::createInternalizePass() {
return new InternalizePass();
}
-ModulePass *llvm::createInternalizePass(ArrayRef<const char *> el) {
- return new InternalizePass(el);
+ModulePass *llvm::createInternalizePass(ArrayRef<const char *> ExportList) {
+ return new InternalizePass(ExportList);
}
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 4ce749c..3861421 100644
--- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -210,16 +210,20 @@ private:
// Any two pointers in the same address space are equivalent, intptr_t and
// pointers are equivalent. Otherwise, standard type equivalence rules apply.
bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const {
+
+ PointerType *PTy1 = dyn_cast<PointerType>(Ty1);
+ PointerType *PTy2 = dyn_cast<PointerType>(Ty2);
+
+ if (TD) {
+ if (PTy1 && PTy1->getAddressSpace() == 0) Ty1 = TD->getIntPtrType(Ty1);
+ if (PTy2 && PTy2->getAddressSpace() == 0) Ty2 = TD->getIntPtrType(Ty2);
+ }
+
if (Ty1 == Ty2)
return true;
- if (Ty1->getTypeID() != Ty2->getTypeID()) {
- if (TD) {
- LLVMContext &Ctx = Ty1->getContext();
- if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ctx)) return true;
- if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ctx)) return true;
- }
+
+ if (Ty1->getTypeID() != Ty2->getTypeID())
return false;
- }
switch (Ty1->getTypeID()) {
default:
@@ -241,8 +245,7 @@ bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const {
return true;
case Type::PointerTyID: {
- PointerType *PTy1 = cast<PointerType>(Ty1);
- PointerType *PTy2 = cast<PointerType>(Ty2);
+ assert(PTy1 && PTy2 && "Both types must be pointers here.");
return PTy1->getAddressSpace() == PTy2->getAddressSpace();
}
@@ -352,14 +355,19 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
// Determine whether two GEP operations perform the same underlying arithmetic.
bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
const GEPOperator *GEP2) {
- // When we have target data, we can reduce the GEP down to the value in bytes
- // added to the address.
- unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 1;
- APInt Offset1(BitWidth, 0), Offset2(BitWidth, 0);
- if (TD &&
- GEP1->accumulateConstantOffset(*TD, Offset1) &&
- GEP2->accumulateConstantOffset(*TD, Offset2)) {
- return Offset1 == Offset2;
+ unsigned AS = GEP1->getPointerAddressSpace();
+ if (AS != GEP2->getPointerAddressSpace())
+ return false;
+
+ if (TD) {
+ // When we have target data, we can reduce the GEP down to the value in bytes
+ // added to the address.
+ unsigned BitWidth = TD ? TD->getPointerSizeInBits(AS) : 1;
+ APInt Offset1(BitWidth, 0), Offset2(BitWidth, 0);
+ if (GEP1->accumulateConstantOffset(*TD, Offset1) &&
+ GEP2->accumulateConstantOffset(*TD, Offset2)) {
+ return Offset1 == Offset2;
+ }
}
if (GEP1->getPointerOperand()->getType() !=
@@ -713,6 +721,19 @@ void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
writeThunk(F, G);
}
+// Helper for writeThunk,
+// Selects proper bitcast operation,
+// but a bit simplier then CastInst::getCastOpcode.
+static Value* createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) {
+ Type *SrcTy = V->getType();
+ if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
+ return Builder.CreateIntToPtr(V, DestTy);
+ else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
+ return Builder.CreatePtrToInt(V, DestTy);
+ else
+ return Builder.CreateBitCast(V, DestTy);
+}
+
// Replace G with a simple tail call to bitcast(F). Also replace direct uses
// of G with bitcast(F). Deletes G.
void MergeFunctions::writeThunk(Function *F, Function *G) {
@@ -738,7 +759,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
FunctionType *FFTy = F->getFunctionType();
for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
AI != AE; ++AI) {
- Args.push_back(Builder.CreateBitCast(AI, FFTy->getParamType(i)));
+ Args.push_back(createCast(Builder, (Value*)AI, FFTy->getParamType(i)));
++i;
}
@@ -748,13 +769,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
if (NewG->getReturnType()->isVoidTy()) {
Builder.CreateRetVoid();
} else {
- Type *RetTy = NewG->getReturnType();
- if (CI->getType()->isIntegerTy() && RetTy->isPointerTy())
- Builder.CreateRet(Builder.CreateIntToPtr(CI, RetTy));
- else if (CI->getType()->isPointerTy() && RetTy->isIntegerTy())
- Builder.CreateRet(Builder.CreatePtrToInt(CI, RetTy));
- else
- Builder.CreateRet(Builder.CreateBitCast(CI, RetTy));
+ Builder.CreateRet(createCast(Builder, CI, NewG->getReturnType()));
}
NewG->copyAttributesFrom(G);
@@ -829,6 +844,18 @@ bool MergeFunctions::insert(ComparableFunction &NewF) {
const ComparableFunction &OldF = *Result.first;
+ // Don't merge tiny functions, since it can just end up making the function
+ // larger.
+ // FIXME: Should still merge them if they are unnamed_addr and produce an
+ // alias.
+ if (NewF.getFunc()->size() == 1) {
+ if (NewF.getFunc()->front().size() <= 2) {
+ DEBUG(dbgs() << NewF.getFunc()->getName()
+ << " is to small to bother merging\n");
+ return false;
+ }
+ }
+
// Never thunk a strong function to a weak function.
assert(!OldF.getFunc()->mayBeOverridden() ||
NewF.getFunc()->mayBeOverridden());
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 986c0b8..24c5018 100644
--- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -29,15 +29,20 @@
using namespace llvm;
static cl::opt<bool>
-RunLoopVectorization("vectorize-loops",
+RunLoopVectorization("vectorize-loops", cl::Hidden,
cl::desc("Run the Loop vectorization passes"));
static cl::opt<bool>
-RunSLPVectorization("vectorize-slp",
+LateVectorization("late-vectorize", cl::init(true), cl::Hidden,
+ cl::desc("Run the vectorization pasess late in the pass "
+ "pipeline (after the inliner)"));
+
+static cl::opt<bool>
+RunSLPVectorization("vectorize-slp", cl::Hidden,
cl::desc("Run the SLP vectorization passes"));
static cl::opt<bool>
-RunBBVectorization("vectorize-slp-aggressive",
+RunBBVectorization("vectorize-slp-aggressive", cl::Hidden,
cl::desc("Run the BB vectorization passes"));
static cl::opt<bool>
@@ -49,17 +54,22 @@ static cl::opt<bool> UseNewSROA("use-new-sroa",
cl::init(true), cl::Hidden,
cl::desc("Enable the new, experimental SROA pass"));
+static cl::opt<bool>
+RunLoopRerolling("reroll-loops", cl::Hidden,
+ cl::desc("Run the loop rerolling pass"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
LibraryInfo = 0;
Inliner = 0;
- DisableSimplifyLibCalls = false;
DisableUnitAtATime = false;
DisableUnrollLoops = false;
BBVectorize = RunBBVectorization;
SLPVectorize = RunSLPVectorization;
LoopVectorize = RunLoopVectorization;
+ LateVectorize = LateVectorization;
+ RerollLoops = RunLoopRerolling;
}
PassManagerBuilder::~PassManagerBuilder() {
@@ -174,8 +184,6 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
else
MPM.add(createScalarReplAggregatesPass(-1, false));
MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
- if (!DisableSimplifyLibCalls)
- MPM.add(createSimplifyLibCallsPass()); // Library Call Optimizations
MPM.add(createJumpThreadingPass()); // Thread jumps.
MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
@@ -192,8 +200,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
MPM.add(createLoopDeletionPass()); // Delete dead loops
- if (LoopVectorize && OptLevel > 2)
- MPM.add(createLoopVectorizePass());
+ if (!LateVectorize && LoopVectorize)
+ MPM.add(createLoopVectorizePass(DisableUnrollLoops));
if (!DisableUnrollLoops)
MPM.add(createLoopUnrollPass()); // Unroll small loops
@@ -213,16 +221,18 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
+ if (RerollLoops)
+ MPM.add(createLoopRerollPass());
if (SLPVectorize)
- MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+ MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
if (BBVectorize) {
MPM.add(createBBVectorizePass());
MPM.add(createInstructionCombiningPass());
if (OptLevel > 1 && UseGVNAfterVectorization)
- MPM.add(createGVNPass()); // Remove redundancies
+ MPM.add(createGVNPass()); // Remove redundancies
else
- MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
+ MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
// BBVectorize may have significantly shortened a loop body; unroll again.
if (!DisableUnrollLoops)
@@ -230,9 +240,25 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
}
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
- MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createInstructionCombiningPass()); // Clean up after everything.
+ // As an experimental mode, run any vectorization passes in a separate
+ // pipeline from the CGSCC pass manager that runs iteratively with the
+ // inliner.
+ if (LateVectorize && LoopVectorize) {
+ // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
+ // pass manager that we are specifically trying to avoid. To prevent this
+ // we must insert a no-op module pass to reset the pass manager.
+ MPM.add(createBarrierNoopPass());
+
+ // Add the various vectorization passes and relevant cleanup passes for
+ // them since we are no longer in the middle of the main scalar pipeline.
+ MPM.add(createLoopVectorizePass(DisableUnrollLoops));
+ MPM.add(createInstructionCombiningPass());
+ MPM.add(createCFGSimplificationPass());
+ }
+
if (!DisableUnitAtATime) {
// FIXME: We shouldn't bother with this anymore.
MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
@@ -257,11 +283,8 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
// Now that composite has been compiled, scan through the module, looking
// for a main function. If main is defined, mark all other functions
// internal.
- if (Internalize) {
- std::vector<const char*> E;
- E.push_back("main");
- PM.add(createInternalizePass(E));
- }
+ if (Internalize)
+ PM.add(createInternalizePass("main"));
// Propagate constants at call sites into the functions they call. This
// opens opportunities for globalopt (and inlining) by substituting function
@@ -302,6 +325,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
// The IPO passes may leave cruft around. Clean up after them.
PM.add(createInstructionCombiningPass());
PM.add(createJumpThreadingPass());
+
// Break up allocas
if (UseNewSROA)
PM.add(createSROAPass());
@@ -315,6 +339,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
PM.add(createLICMPass()); // Hoist loop invariants.
PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
PM.add(createMemCpyOptPass()); // Remove dead memcpys.
+
// Nuke dead stores.
PM.add(createDeadStoreEliminationPass());
@@ -379,8 +404,7 @@ LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB,
void
LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB,
LLVMBool Value) {
- PassManagerBuilder *Builder = unwrap(PMB);
- Builder->DisableSimplifyLibCalls = Value;
+ // NOTE: The simplify-libcalls pass has been removed.
}
void
diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
index 73d9323..b160913 100644
--- a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -51,7 +51,7 @@ namespace {
char PruneEH::ID = 0;
INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh",
"Remove unused exception handling info", false, false)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
INITIALIZE_PASS_END(PruneEH, "prune-eh",
"Remove unused exception handling info", false, false)
@@ -145,15 +145,13 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
NewAttributes.addAttribute(Attribute::NoReturn);
Function *F = (*I)->getFunction();
- const AttributeSet &PAL = F->getAttributes();
- const AttributeSet &NPAL =
- PAL.addAttributes(F->getContext(), AttributeSet::FunctionIndex,
- AttributeSet::get(F->getContext(),
- AttributeSet::FunctionIndex,
- NewAttributes));
+ const AttributeSet &PAL = F->getAttributes().getFnAttributes();
+ const AttributeSet &NPAL = AttributeSet::get(
+ F->getContext(), AttributeSet::FunctionIndex, NewAttributes);
+
if (PAL != NPAL) {
MadeChange = true;
- F->setAttributes(NPAL);
+ F->addAttributes(AttributeSet::FunctionIndex, NPAL);
}
}
diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
index 3396f79..c4f5cfc 100644
--- a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -9,7 +9,7 @@
//
// The StripSymbols transformation implements code stripping. Specifically, it
// can delete:
-//
+//
// * names for virtual registers
// * symbols for internal globals and functions
// * debug information
@@ -39,7 +39,7 @@ namespace {
bool OnlyDebugInfo;
public:
static char ID; // Pass identification, replacement for typeid
- explicit StripSymbols(bool ODI = false)
+ explicit StripSymbols(bool ODI = false)
: ModulePass(ID), OnlyDebugInfo(ODI) {
initializeStripSymbolsPass(*PassRegistry::getPassRegistry());
}
@@ -144,7 +144,7 @@ static void RemoveDeadConstant(Constant *C) {
assert(C->use_empty() && "Constant is not dead!");
SmallPtrSet<Constant*, 4> Operands;
for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
- if (OnlyUsedBy(C->getOperand(i), C))
+ if (OnlyUsedBy(C->getOperand(i), C))
Operands.insert(cast<Constant>(C->getOperand(i)));
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
if (!GV->hasLocalLinkage()) return; // Don't delete non static globals.
@@ -182,7 +182,7 @@ static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
StructType *STy = StructTypes[i];
if (STy->isLiteral() || STy->getName().empty()) continue;
-
+
if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg"))
continue;
@@ -199,7 +199,7 @@ static void findUsedValues(GlobalVariable *LLVMUsed,
ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
- if (GlobalValue *GV =
+ if (GlobalValue *GV =
dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
UsedValues.insert(GV);
}
@@ -217,71 +217,20 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
I->setName(""); // Internal symbols can't participate in linkage
}
-
+
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
I->setName(""); // Internal symbols can't participate in linkage
StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo);
}
-
+
// Remove all names from types.
StripTypeNames(M, PreserveDbgInfo);
return true;
}
-// StripDebugInfo - Strip debug info in the module if it exists.
-// To do this, we remove llvm.dbg.func.start, llvm.dbg.stoppoint, and
-// llvm.dbg.region.end calls, and any globals they point to if now dead.
-static bool StripDebugInfo(Module &M) {
-
- bool Changed = false;
-
- // Remove all of the calls to the debugger intrinsics, and remove them from
- // the module.
- if (Function *Declare = M.getFunction("llvm.dbg.declare")) {
- while (!Declare->use_empty()) {
- CallInst *CI = cast<CallInst>(Declare->use_back());
- CI->eraseFromParent();
- }
- Declare->eraseFromParent();
- Changed = true;
- }
-
- if (Function *DbgVal = M.getFunction("llvm.dbg.value")) {
- while (!DbgVal->use_empty()) {
- CallInst *CI = cast<CallInst>(DbgVal->use_back());
- CI->eraseFromParent();
- }
- DbgVal->eraseFromParent();
- Changed = true;
- }
-
- for (Module::named_metadata_iterator NMI = M.named_metadata_begin(),
- NME = M.named_metadata_end(); NMI != NME;) {
- NamedMDNode *NMD = NMI;
- ++NMI;
- if (NMD->getName().startswith("llvm.dbg.")) {
- NMD->eraseFromParent();
- Changed = true;
- }
- }
-
- for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
- for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
- ++FI)
- for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
- ++BI) {
- if (!BI->getDebugLoc().isUnknown()) {
- Changed = true;
- BI->setDebugLoc(DebugLoc());
- }
- }
-
- return Changed;
-}
-
bool StripSymbols::runOnModule(Module &M) {
bool Changed = false;
Changed |= StripDebugInfo(M);
@@ -307,13 +256,13 @@ bool StripDebugDeclare::runOnModule(Module &M) {
assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
CI->eraseFromParent();
if (Arg1->use_empty()) {
- if (Constant *C = dyn_cast<Constant>(Arg1))
+ if (Constant *C = dyn_cast<Constant>(Arg1))
DeadConstants.push_back(C);
- else
+ else
RecursivelyDeleteTriviallyDeadInstructions(Arg1);
}
if (Arg2->use_empty())
- if (Constant *C = dyn_cast<Constant>(Arg2))
+ if (Constant *C = dyn_cast<Constant>(Arg2))
DeadConstants.push_back(C);
}
Declare->eraseFromParent();
@@ -332,81 +281,107 @@ bool StripDebugDeclare::runOnModule(Module &M) {
return true;
}
-/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
-/// printer to not emit usual symbol prefix before the symbol name is used then
-/// return linkage name after skipping this special LLVM prefix.
-static StringRef getRealLinkageName(StringRef LinkageName) {
- char One = '\1';
- if (LinkageName.startswith(StringRef(&One, 1)))
- return LinkageName.substr(1);
- return LinkageName;
-}
-
+/// Remove any debug info for global variables/functions in the given module for
+/// which said global variable/function no longer exists (i.e. is null).
+///
+/// Debugging information is encoded in llvm IR using metadata. This is designed
+/// such a way that debug info for symbols preserved even if symbols are
+/// optimized away by the optimizer. This special pass removes debug info for
+/// such symbols.
bool StripDeadDebugInfo::runOnModule(Module &M) {
bool Changed = false;
- // Debugging infomration is encoded in llvm IR using metadata. This is designed
- // such a way that debug info for symbols preserved even if symbols are
- // optimized away by the optimizer. This special pass removes debug info for
- // such symbols.
-
- // llvm.dbg.gv keeps track of debug info for global variables.
- if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) {
- SmallVector<MDNode *, 8> MDs;
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
- if (DIGlobalVariable(NMD->getOperand(i)).Verify())
- MDs.push_back(NMD->getOperand(i));
- else
- Changed = true;
- NMD->eraseFromParent();
- NMD = NULL;
-
- for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(),
- E = MDs.end(); I != E; ++I) {
- GlobalVariable *GV = DIGlobalVariable(*I).getGlobal();
- if (GV && M.getGlobalVariable(GV->getName(), true)) {
- if (!NMD)
- NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
- NMD->addOperand(*I);
- }
+ LLVMContext &C = M.getContext();
+
+ // Find all debug info in F. This is actually overkill in terms of what we
+ // want to do, but we want to try and be as resilient as possible in the face
+ // of potential debug info changes by using the formal interfaces given to us
+ // as much as possible.
+ DebugInfoFinder F;
+ F.processModule(M);
+
+ // For each compile unit, find the live set of global variables/functions and
+ // replace the current list of potentially dead global variables/functions
+ // with the live list.
+ SmallVector<Value *, 64> LiveGlobalVariables;
+ SmallVector<Value *, 64> LiveSubprograms;
+ DenseSet<const MDNode *> VisitedSet;
+
+ for (DebugInfoFinder::iterator CI = F.compile_unit_begin(),
+ CE = F.compile_unit_end(); CI != CE; ++CI) {
+ // Create our compile unit.
+ DICompileUnit DIC(*CI);
+ assert(DIC.Verify() && "DIC must verify as a DICompileUnit.");
+
+ // Create our live subprogram list.
+ DIArray SPs = DIC.getSubprograms();
+ bool SubprogramChange = false;
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
+ DISubprogram DISP(SPs.getElement(i));
+ assert(DISP.Verify() && "DISP must verify as a DISubprogram.");
+
+ // Make sure we visit each subprogram only once.
+ if (!VisitedSet.insert(DISP).second)
+ continue;
+
+ // If the function referenced by DISP is not null, the function is live.
+ if (DISP.getFunction())
+ LiveSubprograms.push_back(DISP);
else
- Changed = true;
+ SubprogramChange = true;
}
- }
- // llvm.dbg.sp keeps track of debug info for subprograms.
- if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) {
- SmallVector<MDNode *, 8> MDs;
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
- if (DISubprogram(NMD->getOperand(i)).Verify())
- MDs.push_back(NMD->getOperand(i));
+ // Create our live global variable list.
+ DIArray GVs = DIC.getGlobalVariables();
+ bool GlobalVariableChange = false;
+ for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) {
+ DIGlobalVariable DIG(GVs.getElement(i));
+ assert(DIG.Verify() && "DIG must verify as DIGlobalVariable.");
+
+ // Make sure we only visit each global variable only once.
+ if (!VisitedSet.insert(DIG).second)
+ continue;
+
+ // If the global variable referenced by DIG is not null, the global
+ // variable is live.
+ if (DIG.getGlobal())
+ LiveGlobalVariables.push_back(DIG);
else
- Changed = true;
- NMD->eraseFromParent();
- NMD = NULL;
-
- for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(),
- E = MDs.end(); I != E; ++I) {
- bool FnIsLive = false;
- if (Function *F = DISubprogram(*I).getFunction())
- if (M.getFunction(F->getName()))
- FnIsLive = true;
- if (FnIsLive) {
- if (!NMD)
- NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
- NMD->addOperand(*I);
- } else {
- // Remove llvm.dbg.lv.fnname named mdnode which may have been used
- // to hold debug info for dead function's local variables.
- StringRef FName = DISubprogram(*I).getLinkageName();
- if (FName.empty())
- FName = DISubprogram(*I).getName();
- if (NamedMDNode *LVNMD =
- M.getNamedMetadata(Twine("llvm.dbg.lv.",
- getRealLinkageName(FName))))
- LVNMD->eraseFromParent();
- }
+ GlobalVariableChange = true;
+ }
+
+ // If we found dead subprograms or global variables, replace the current
+ // subprogram list/global variable list with our new live subprogram/global
+ // variable list.
+ if (SubprogramChange) {
+ // Make sure that 9 is still the index of the subprograms. This is to make
+ // sure that an assert is hit if the location of the subprogram array
+ // changes. This is just to make sure that this is updated if such an
+ // event occurs.
+ assert(DIC->getNumOperands() >= 10 &&
+ SPs == DIC->getOperand(9) &&
+ "DICompileUnits is expected to store Subprograms in operand "
+ "9.");
+ DIC->replaceOperandWith(9, MDNode::get(C, LiveSubprograms));
+ Changed = true;
}
+
+ if (GlobalVariableChange) {
+ // Make sure that 10 is still the index of global variables. This is to
+ // make sure that an assert is hit if the location of the subprogram array
+ // changes. This is just to make sure that this index is updated if such
+ // an event occurs.
+ assert(DIC->getNumOperands() >= 11 &&
+ GVs == DIC->getOperand(10) &&
+ "DICompileUnits is expected to store Global Variables in operand "
+ "10.");
+ DIC->replaceOperandWith(10, MDNode::get(C, LiveGlobalVariables));
+ Changed = true;
+ }
+
+ // Reset lists for the next iteration.
+ LiveSubprograms.clear();
+ LiveGlobalVariables.clear();
}
return Changed;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
index 2a36074..a5eddc2 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
@@ -1,4 +1,4 @@
-//===- InstCombine.h - Main InstCombine pass definition -------------------===//
+//===- InstCombine.h - Main InstCombine pass definition ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -158,8 +158,8 @@ public:
ConstantInt *DivRHS);
Instruction *FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *DivI,
ConstantInt *DivRHS);
- Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI,
- ICmpInst::Predicate Pred, Value *TheAdd);
+ Instruction *FoldICmpAddOpCst(Instruction &ICI, Value *X, ConstantInt *CI,
+ ICmpInst::Predicate Pred);
Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
ICmpInst::Predicate Cond, Instruction &I);
Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
@@ -178,6 +178,7 @@ public:
Instruction *visitPtrToInt(PtrToIntInst &CI);
Instruction *visitIntToPtr(IntToPtrInst &CI);
Instruction *visitBitCast(BitCastInst &CI);
+ Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI);
Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI,
Instruction *FI);
Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*);
@@ -212,8 +213,8 @@ private:
bool ShouldChangeType(Type *From, Type *To) const;
Value *dyn_castNegVal(Value *V) const;
Value *dyn_castFNegVal(Value *V, bool NoSignedZero=false) const;
- Type *FindElementAtOffset(Type *Ty, int64_t Offset,
- SmallVectorImpl<Value*> &NewIndices);
+ Type *FindElementAtOffset(Type *PtrTy, int64_t Offset,
+ SmallVectorImpl<Value*> &NewIndices);
Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
@@ -234,6 +235,7 @@ private:
bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
Value *EmitGEPOffset(User *GEP);
Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
+ Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask);
public:
// InsertNewInstBefore - insert an instruction New before instruction Old
@@ -270,7 +272,7 @@ public:
if (&I == V)
V = UndefValue::get(I.getType());
- DEBUG(errs() << "IC: Replacing " << I << "\n"
+ DEBUG(dbgs() << "IC: Replacing " << I << "\n"
" with " << *V << '\n');
I.replaceAllUsesWith(V);
@@ -282,7 +284,7 @@ public:
// instruction. Instead, visit methods should return the value returned by
// this function.
Instruction *EraseInstFromFunction(Instruction &I) {
- DEBUG(errs() << "IC: ERASE " << I << '\n');
+ DEBUG(dbgs() << "IC: ERASE " << I << '\n');
assert(I.use_empty() && "Cannot erase instruction that is used!");
// Make sure that we reprocess all operands now that we reduced their
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 166f8df..534feb8 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "InstCombine.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
@@ -488,7 +489,7 @@ Value *FAddCombine::performFactorization(Instruction *I) {
createFSub(AddSub0, AddSub1);
if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) {
const APFloat &F = CFP->getValueAPF();
- if (!F.isNormal() || F.isDenormal())
+ if (!F.isNormal())
return 0;
}
@@ -659,7 +660,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
}
}
- assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) &&
+ assert((NextTmpIdx <= array_lengthof(TmpResult) + 1) &&
"out-of-bound access");
if (ConstAdd)
@@ -876,7 +877,7 @@ static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
uint32_t CSTVal = CST->getLimitedValue(BitWidth);
CST = ConstantInt::get(V->getType()->getContext(),
- APInt(BitWidth, 1).shl(CSTVal));
+ APInt::getOneBitSet(BitWidth, CSTVal));
return I->getOperand(0);
}
return 0;
@@ -1185,9 +1186,15 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), TD))
return ReplaceInstUsesWith(I, V);
- if (isa<Constant>(RHS) && isa<PHINode>(LHS))
- if (Instruction *NV = FoldOpIntoPhi(I))
- return NV;
+ if (isa<Constant>(RHS)) {
+ if (isa<PHINode>(LHS))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(LHS))
+ if (Instruction *NV = FoldOpIntoSelect(I, SI))
+ return NV;
+ }
// -A + B --> B - A
// -A + -B --> -(A + B)
@@ -1516,9 +1523,33 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), TD))
return ReplaceInstUsesWith(I, V);
- // If this is a 'B = x-(-A)', change to B = x+A...
- if (Value *V = dyn_castFNegVal(Op1))
- return BinaryOperator::CreateFAdd(Op0, V);
+ if (isa<Constant>(Op0))
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+ if (Instruction *NV = FoldOpIntoSelect(I, SI))
+ return NV;
+
+ // If this is a 'B = x-(-A)', change to B = x+A, potentially looking
+ // through FP extensions/truncations along the way.
+ if (Value *V = dyn_castFNegVal(Op1)) {
+ Instruction *NewI = BinaryOperator::CreateFAdd(Op0, V);
+ NewI->copyFastMathFlags(&I);
+ return NewI;
+ }
+ if (FPTruncInst *FPTI = dyn_cast<FPTruncInst>(Op1)) {
+ if (Value *V = dyn_castFNegVal(FPTI->getOperand(0))) {
+ Value *NewTrunc = Builder->CreateFPTrunc(V, I.getType());
+ Instruction *NewI = BinaryOperator::CreateFAdd(Op0, NewTrunc);
+ NewI->copyFastMathFlags(&I);
+ return NewI;
+ }
+ } else if (FPExtInst *FPEI = dyn_cast<FPExtInst>(Op1)) {
+ if (Value *V = dyn_castFNegVal(FPEI->getOperand(0))) {
+ Value *NewExt = Builder->CreateFPExt(V, I.getType());
+ Instruction *NewI = BinaryOperator::CreateFAdd(Op0, NewExt);
+ NewI->copyFastMathFlags(&I);
+ return NewI;
+ }
+ }
if (I.hasUnsafeAlgebra()) {
if (Value *V = FAddCombine(Builder).simplify(&I))
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index ec75dd2..88bb69b 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -173,14 +173,14 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
// Adding a one to a single bit bit-field should be turned into an XOR
// of the bit. First thing to check is to see if this AND is with a
// single bit constant.
- const APInt &AndRHSV = cast<ConstantInt>(AndRHS)->getValue();
+ const APInt &AndRHSV = AndRHS->getValue();
// If there is only one bit set.
if (AndRHSV.isPowerOf2()) {
// Ok, at this point, we know that we are masking the result of the
// ADD down to exactly one bit. If the constant we are adding has
// no bits set below this bit, then we can eliminate the ADD.
- const APInt& AddRHS = cast<ConstantInt>(OpRHS)->getValue();
+ const APInt& AddRHS = OpRHS->getValue();
// Check to see if any bits below the one bit set in AndRHSV are set.
if ((AddRHS & (AndRHSV-1)) == 0) {
@@ -209,8 +209,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
uint32_t BitWidth = AndRHS->getType()->getBitWidth();
uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal));
- ConstantInt *CI = ConstantInt::get(AndRHS->getContext(),
- AndRHS->getValue() & ShlMask);
+ ConstantInt *CI = Builder->getInt(AndRHS->getValue() & ShlMask);
if (CI->getValue() == ShlMask)
// Masking out bits that the shift already masks.
@@ -230,8 +229,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
uint32_t BitWidth = AndRHS->getType()->getBitWidth();
uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
- ConstantInt *CI = ConstantInt::get(Op->getContext(),
- AndRHS->getValue() & ShrMask);
+ ConstantInt *CI = Builder->getInt(AndRHS->getValue() & ShrMask);
if (CI->getValue() == ShrMask)
// Masking out bits that the shift already masks.
@@ -251,8 +249,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
uint32_t BitWidth = AndRHS->getType()->getBitWidth();
uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
- Constant *C = ConstantInt::get(Op->getContext(),
- AndRHS->getValue() & ShrMask);
+ Constant *C = Builder->getInt(AndRHS->getValue() & ShrMask);
if (C == AndRHS) { // Masking out bits shifted in.
// (Val ashr C1) & C2 -> (Val lshr C1) & C2
// Make the argument unsigned.
@@ -279,7 +276,7 @@ Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
if (Inside) {
if (Lo == Hi) // Trivially false.
- return ConstantInt::getFalse(V->getContext());
+ return Builder->getFalse();
// V >= Min && V < Hi --> V < Hi
if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
@@ -296,7 +293,7 @@ Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
}
if (Lo == Hi) // Trivially true.
- return ConstantInt::getTrue(V->getContext());
+ return Builder->getTrue();
// V < Min || V >= Hi -> V > Hi-1
Hi = SubOne(cast<ConstantInt>(Hi));
@@ -491,6 +488,26 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
return result;
}
+/// Convert an analysis of a masked ICmp into its equivalent if all boolean
+/// operations had the opposite sense. Since each "NotXXX" flag (recording !=)
+/// is adjacent to the corresponding normal flag (recording ==), this just
+/// involves swapping those bits over.
+static unsigned conjugateICmpMask(unsigned Mask) {
+ unsigned NewMask;
+ NewMask = (Mask & (FoldMskICmp_AMask_AllOnes | FoldMskICmp_BMask_AllOnes |
+ FoldMskICmp_Mask_AllZeroes | FoldMskICmp_AMask_Mixed |
+ FoldMskICmp_BMask_Mixed))
+ << 1;
+
+ NewMask |=
+ (Mask & (FoldMskICmp_AMask_NotAllOnes | FoldMskICmp_BMask_NotAllOnes |
+ FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_AMask_NotMixed |
+ FoldMskICmp_BMask_NotMixed))
+ >> 1;
+
+ return NewMask;
+}
+
/// decomposeBitTestICmp - Decompose an icmp into the form ((X & Y) pred Z)
/// if possible. The returned predicate is either == or !=. Returns false if
/// decomposition fails.
@@ -551,14 +568,22 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
L21 = L22 = L1 = 0;
} else {
// Look for ANDs in the LHS icmp.
- if (match(L1, m_And(m_Value(L11), m_Value(L12)))) {
- if (!match(L2, m_And(m_Value(L21), m_Value(L22))))
- L21 = L22 = 0;
- } else {
- if (!match(L2, m_And(m_Value(L11), m_Value(L12))))
- return 0;
- std::swap(L1, L2);
+ if (!L1->getType()->isIntegerTy()) {
+ // You can icmp pointers, for example. They really aren't masks.
+ L11 = L12 = 0;
+ } else if (!match(L1, m_And(m_Value(L11), m_Value(L12)))) {
+ // Any icmp can be viewed as being trivially masked; if it allows us to
+ // remove one, it's worth it.
+ L11 = L1;
+ L12 = Constant::getAllOnesValue(L1->getType());
+ }
+
+ if (!L2->getType()->isIntegerTy()) {
+ // You can icmp pointers, for example. They really aren't masks.
L21 = L22 = 0;
+ } else if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) {
+ L21 = L2;
+ L22 = Constant::getAllOnesValue(L2->getType());
}
}
@@ -579,7 +604,14 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
return 0;
}
E = R2; R1 = 0; ok = true;
- } else if (match(R1, m_And(m_Value(R11), m_Value(R12)))) {
+ } else if (R1->getType()->isIntegerTy()) {
+ if (!match(R1, m_And(m_Value(R11), m_Value(R12)))) {
+ // As before, model no mask as a trivial mask if it'll let us do an
+ // optimisation.
+ R11 = R1;
+ R12 = Constant::getAllOnesValue(R1->getType());
+ }
+
if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
A = R11; D = R12; E = R2; ok = true;
} else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
@@ -592,7 +624,12 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
return 0;
// Look for ANDs in on the right side of the RHS icmp.
- if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) {
+ if (!ok && R2->getType()->isIntegerTy()) {
+ if (!match(R2, m_And(m_Value(R11), m_Value(R12)))) {
+ R11 = R2;
+ R12 = Constant::getAllOnesValue(R2->getType());
+ }
+
if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
A = R11; D = R12; E = R1; ok = true;
} else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
@@ -621,8 +658,7 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
/// foldLogOpOfMaskedICmps:
/// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
/// into a single (icmp(A & X) ==/!= Y)
-static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
- ICmpInst::Predicate NEWCC,
+static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
llvm::InstCombiner::BuilderTy* Builder) {
Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0;
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
@@ -632,8 +668,24 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) &&
"foldLogOpOfMaskedICmpsHelper must return an equality predicate.");
- if (NEWCC == ICmpInst::ICMP_NE)
- mask >>= 1; // treat "Not"-states as normal states
+ // In full generality:
+ // (icmp (A & B) Op C) | (icmp (A & D) Op E)
+ // == ![ (icmp (A & B) !Op C) & (icmp (A & D) !Op E) ]
+ //
+ // If the latter can be converted into (icmp (A & X) Op Y) then the former is
+ // equivalent to (icmp (A & X) !Op Y).
+ //
+ // Therefore, we can pretend for the rest of this function that we're dealing
+ // with the conjunction, provided we flip the sense of any comparisons (both
+ // input and output).
+
+ // In most cases we're going to produce an EQ for the "&&" case.
+ ICmpInst::Predicate NEWCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
+ if (!IsAnd) {
+ // Convert the masking analysis into its equivalent with negated
+ // comparisons.
+ mask = conjugateICmpMask(mask);
+ }
if (mask & FoldMskICmp_Mask_AllZeroes) {
// (icmp eq (A & B), 0) & (icmp eq (A & D), 0)
@@ -660,6 +712,40 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
Value* newAnd = Builder->CreateAnd(A, newAnd1);
return Builder->CreateICmp(NEWCC, newAnd, A);
}
+
+ // Remaining cases assume at least that B and D are constant, and depend on
+ // their actual values. This isn't strictly, necessary, just a "handle the
+ // easy cases for now" decision.
+ ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+ if (BCst == 0) return 0;
+ ConstantInt *DCst = dyn_cast<ConstantInt>(D);
+ if (DCst == 0) return 0;
+
+ if (mask & (FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_BMask_NotAllOnes)) {
+ // (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and
+ // (icmp ne (A & B), B) & (icmp ne (A & D), D)
+ // -> (icmp ne (A & B), 0) or (icmp ne (A & D), 0)
+ // Only valid if one of the masks is a superset of the other (check "B&D" is
+ // the same as either B or D).
+ APInt NewMask = BCst->getValue() & DCst->getValue();
+
+ if (NewMask == BCst->getValue())
+ return LHS;
+ else if (NewMask == DCst->getValue())
+ return RHS;
+ }
+ if (mask & FoldMskICmp_AMask_NotAllOnes) {
+ // (icmp ne (A & B), B) & (icmp ne (A & D), D)
+ // -> (icmp ne (A & B), A) or (icmp ne (A & D), A)
+ // Only valid if one of the masks is a superset of the other (check "B|D" is
+ // the same as either B or D).
+ APInt NewMask = BCst->getValue() | DCst->getValue();
+
+ if (NewMask == BCst->getValue())
+ return LHS;
+ else if (NewMask == DCst->getValue())
+ return RHS;
+ }
if (mask & FoldMskICmp_BMask_Mixed) {
// (icmp eq (A & B), C) & (icmp eq (A & D), E)
// We already know that B & C == C && D & E == E.
@@ -668,14 +754,9 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
// contradict, then we can transform to
// -> (icmp eq (A & (B|D)), (C|E))
// Currently, we only handle the case of B, C, D, and E being constant.
- ConstantInt *BCst = dyn_cast<ConstantInt>(B);
- if (BCst == 0) return 0;
- ConstantInt *DCst = dyn_cast<ConstantInt>(D);
- if (DCst == 0) return 0;
// we can't simply use C and E, because we might actually handle
// (icmp ne (A & B), B) & (icmp eq (A & D), D)
// with B and D, having a single bit set
-
ConstantInt *CCst = dyn_cast<ConstantInt>(C);
if (CCst == 0) return 0;
if (LHSCC != NEWCC)
@@ -718,7 +799,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
}
// handle (roughly): (icmp eq (A & B), C) & (icmp eq (A & D), E)
- if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder))
+ if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, true, Builder))
return V;
// This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
@@ -852,10 +933,15 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15
return RHS;
case ICmpInst::ICMP_NE:
+ // Special case to get the ordering right when the values wrap around
+ // zero.
+ if (LHSCst->getValue() == 0 && RHSCst->getValue().isAllOnesValue())
+ std::swap(LHSCst, RHSCst);
if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
Constant *AddCST = ConstantExpr::getNeg(LHSCst);
Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
- return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1));
+ return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1),
+ Val->getName()+".cmp");
}
break; // (X != 13 & X != 15) -> no change
}
@@ -943,7 +1029,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
// If either of the constants are nans, then the whole thing returns
// false.
if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
- return ConstantInt::getFalse(LHS->getContext());
+ return Builder->getFalse();
return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
}
@@ -1302,7 +1388,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
/// always in the local (OverallLeftShift) coordinate space.
///
static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
- SmallVector<Value*, 8> &ByteValues) {
+ SmallVectorImpl<Value *> &ByteValues) {
if (Instruction *I = dyn_cast<Instruction>(V)) {
// If this is an or instruction, it may be an inner node of the bswap.
if (I->getOpcode() == Instruction::Or) {
@@ -1380,7 +1466,7 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
// into a byteswap. At least one of the two bytes would not be aligned with
// their ultimate destination.
if (!isPowerOf2_32(ByteMask)) return true;
- unsigned InputByteNo = CountTrailingZeros_32(ByteMask);
+ unsigned InputByteNo = countTrailingZeros(ByteMask);
// 2) The input and ultimate destinations must line up: if byte 3 of an i32
// is demanded, it needs to go into byte 0 of the result. This means that the
@@ -1457,10 +1543,60 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
return 0;
}
+/// IsOneHotValue - Returns true for "one-hot" values (values where at most
+/// one bit can be set).
+static bool IsOneHotValue(Value *V) {
+ // Match 1<<K.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V))
+ if (BO->getOpcode() == Instruction::Shl) {
+ ConstantInt *One = dyn_cast<ConstantInt>(BO->getOperand(0));
+ return One && One->isOne();
+ }
+
+ // Check for power of two integer constants.
+ if (ConstantInt *K = dyn_cast<ConstantInt>(V))
+ return K->getValue().isPowerOf2();
+
+ return false;
+}
+
/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+ // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
+ // if K1 and K2 are a one-bit mask.
+ ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
+ ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
+
+ if (LHS->getPredicate() == ICmpInst::ICMP_EQ && LHSCst && LHSCst->isZero() &&
+ RHS->getPredicate() == ICmpInst::ICMP_EQ && RHSCst && RHSCst->isZero()) {
+
+ BinaryOperator *LAnd = dyn_cast<BinaryOperator>(LHS->getOperand(0));
+ BinaryOperator *RAnd = dyn_cast<BinaryOperator>(RHS->getOperand(0));
+ if (LAnd && RAnd && LAnd->hasOneUse() && RHS->hasOneUse() &&
+ LAnd->getOpcode() == Instruction::And &&
+ RAnd->getOpcode() == Instruction::And) {
+
+ Value *Mask = 0;
+ Value *Masked = 0;
+ if (LAnd->getOperand(0) == RAnd->getOperand(0) &&
+ IsOneHotValue(LAnd->getOperand(1)) &&
+ IsOneHotValue(RAnd->getOperand(1))) {
+ Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1));
+ Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask);
+ } else if (LAnd->getOperand(1) == RAnd->getOperand(1) &&
+ IsOneHotValue(LAnd->getOperand(0)) &&
+ IsOneHotValue(RAnd->getOperand(0))) {
+ Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0));
+ Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask);
+ }
+
+ if (Masked)
+ return Builder->CreateICmp(ICmpInst::ICMP_NE, Masked, Mask);
+ }
+ }
+
// (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
if (PredicatesFoldable(LHSCC, RHSCC)) {
if (LHS->getOperand(0) == RHS->getOperand(1) &&
@@ -1477,13 +1613,37 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
// handle (roughly):
// (icmp ne (A & B), C) | (icmp ne (A & D), E)
- if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_NE, Builder))
+ if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, false, Builder))
return V;
- // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
- ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
- ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
+ if (LHS->hasOneUse() || RHS->hasOneUse()) {
+ // (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1)
+ // (icmp eq B, 0) | (icmp ugt B, A) -> (icmp ule A, B-1)
+ Value *A = 0, *B = 0;
+ if (LHSCC == ICmpInst::ICMP_EQ && LHSCst && LHSCst->isZero()) {
+ B = Val;
+ if (RHSCC == ICmpInst::ICMP_ULT && Val == RHS->getOperand(1))
+ A = Val2;
+ else if (RHSCC == ICmpInst::ICMP_UGT && Val == Val2)
+ A = RHS->getOperand(1);
+ }
+ // (icmp ult A, B) | (icmp eq B, 0) -> (icmp ule A, B-1)
+ // (icmp ugt B, A) | (icmp eq B, 0) -> (icmp ule A, B-1)
+ else if (RHSCC == ICmpInst::ICMP_EQ && RHSCst && RHSCst->isZero()) {
+ B = Val2;
+ if (LHSCC == ICmpInst::ICMP_ULT && Val2 == LHS->getOperand(1))
+ A = Val;
+ else if (LHSCC == ICmpInst::ICMP_UGT && Val2 == Val)
+ A = LHS->getOperand(1);
+ }
+ if (A && B)
+ return Builder->CreateICmp(
+ ICmpInst::ICMP_UGE,
+ Builder->CreateAdd(B, ConstantInt::getSigned(B->getType(), -1)), A);
+ }
+
+ // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
if (LHSCst == 0 || RHSCst == 0) return 0;
if (LHSCst == RHSCst && LHSCC == RHSCC) {
@@ -1588,7 +1748,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true
case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true
case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true
- return ConstantInt::getTrue(LHS->getContext());
+ return Builder->getTrue();
}
case ICmpInst::ICMP_ULT:
switch (RHSCC) {
@@ -1640,7 +1800,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
break;
case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true
case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true
- return ConstantInt::getTrue(LHS->getContext());
+ return Builder->getTrue();
case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change
break;
}
@@ -1655,7 +1815,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
break;
case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true
case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true
- return ConstantInt::getTrue(LHS->getContext());
+ return Builder->getTrue();
case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change
break;
}
@@ -1676,7 +1836,7 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
// If either of the constants are nans, then the whole thing returns
// true.
if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
- return ConstantInt::getTrue(LHS->getContext());
+ return Builder->getTrue();
// Otherwise, no need to compare the two constants, compare the
// rest.
@@ -1779,8 +1939,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
Value *Or = Builder->CreateOr(X, RHS);
Or->takeName(Op0);
return BinaryOperator::CreateAnd(Or,
- ConstantInt::get(I.getContext(),
- RHS->getValue() | C1->getValue()));
+ Builder->getInt(RHS->getValue() | C1->getValue()));
}
// (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
@@ -1789,8 +1948,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
Value *Or = Builder->CreateOr(X, RHS);
Or->takeName(Op0);
return BinaryOperator::CreateXor(Or,
- ConstantInt::get(I.getContext(),
- C1->getValue() & ~RHS->getValue()));
+ Builder->getInt(C1->getValue() & ~RHS->getValue()));
}
// Try to fold constant and into select arguments.
@@ -1872,15 +2030,13 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) || // (V|N)
(V2 == B && MaskedValueIsZero(V1, ~C1->getValue())))) // (N|V)
return BinaryOperator::CreateAnd(A,
- ConstantInt::get(A->getContext(),
- C1->getValue()|C2->getValue()));
+ Builder->getInt(C1->getValue()|C2->getValue()));
// Or commutes, try both ways.
if (match(B, m_Or(m_Value(V1), m_Value(V2))) &&
((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) || // (V|N)
(V2 == A && MaskedValueIsZero(V1, ~C2->getValue())))) // (N|V)
return BinaryOperator::CreateAnd(B,
- ConstantInt::get(B->getContext(),
- C1->getValue()|C2->getValue()));
+ Builder->getInt(C1->getValue()|C2->getValue()));
// ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2)
// iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0.
@@ -1891,8 +2047,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
(C4->getValue() & ~C2->getValue()) == 0) {
V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield");
return BinaryOperator::CreateAnd(V2,
- ConstantInt::get(B->getContext(),
- C1->getValue()|C2->getValue()));
+ Builder->getInt(C1->getValue()|C2->getValue()));
}
}
}
@@ -2160,8 +2315,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (CI->hasOneUse() && Op0C->hasOneUse()) {
Instruction::CastOps Opcode = Op0C->getOpcode();
if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
- (RHS == ConstantExpr::getCast(Opcode,
- ConstantInt::getTrue(I.getContext()),
+ (RHS == ConstantExpr::getCast(Opcode, Builder->getTrue(),
Op0C->getDestTy()))) {
CI->setPredicate(CI->getInversePredicate());
return CastInst::Create(Opcode, CI, Op0C->getType());
@@ -2191,8 +2345,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
Op0I->getOperand(0));
} else if (RHS->getValue().isSignBit()) {
// (X + C) ^ signbit -> (X + C + signbit)
- Constant *C = ConstantInt::get(I.getContext(),
- RHS->getValue() + Op0CI->getValue());
+ Constant *C = Builder->getInt(RHS->getValue() + Op0CI->getValue());
return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 78b4a2c..0cd7b14 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -946,7 +946,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
int ix = FTy->getNumParams();
// See if we can optimize any arguments passed through the varargs area of
// the call.
- for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
+ for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
E = CS.arg_end(); I != E; ++I, ++ix) {
CastInst *CI = dyn_cast<CastInst>(*I);
if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) {
@@ -999,19 +999,15 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// Check to see if we are changing the return type...
if (OldRetTy != NewRetTy) {
- if (Callee->isDeclaration() &&
- // Conversion is ok if changing from one pointer type to another or from
- // a pointer to an integer of the same size.
- !((OldRetTy->isPointerTy() || !TD ||
- OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
- (NewRetTy->isPointerTy() || !TD ||
- NewRetTy == TD->getIntPtrType(Caller->getContext()))))
- return false; // Cannot transform this return value.
+ if (!CastInst::isBitCastable(NewRetTy, OldRetTy)) {
+ if (Callee->isDeclaration())
+ return false; // Cannot transform this return value.
- if (!Caller->use_empty() &&
- // void -> non-void is handled specially
- !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy))
+ if (!Caller->use_empty() &&
+ // void -> non-void is handled specially
+ !NewRetTy->isVoidTy())
return false; // Cannot transform this return value.
+ }
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
@@ -1036,7 +1032,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
return false;
}
- unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
+ unsigned NumActualArgs = CS.arg_size();
unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
CallSite::arg_iterator AI = CS.arg_begin();
@@ -1044,7 +1040,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
Type *ParamTy = FT->getParamType(i);
Type *ActTy = (*AI)->getType();
- if (!CastInst::isCastable(ActTy, ParamTy))
+ if (!CastInst::isBitCastable(ActTy, ParamTy))
return false; // Cannot transform this parameter value.
if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1).
@@ -1061,20 +1057,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
return false;
- Type *CurElTy = cast<PointerType>(ActTy)->getElementType();
+ Type *CurElTy = ActTy->getPointerElementType();
if (TD->getTypeAllocSize(CurElTy) !=
TD->getTypeAllocSize(ParamPTy->getElementType()))
return false;
}
-
- // Converting from one pointer type to another or between a pointer and an
- // integer of the same size is safe even if we do not have a body.
- bool isConvertible = ActTy == ParamTy ||
- (TD && ((ParamTy->isPointerTy() ||
- ParamTy == TD->getIntPtrType(Caller->getContext())) &&
- (ActTy->isPointerTy() ||
- ActTy == TD->getIntPtrType(Caller->getContext()))));
- if (Callee->isDeclaration() && !isConvertible) return false;
}
if (Callee->isDeclaration()) {
@@ -1141,12 +1128,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
AI = CS.arg_begin();
for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
Type *ParamTy = FT->getParamType(i);
+
if ((*AI)->getType() == ParamTy) {
Args.push_back(*AI);
} else {
- Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
- false, ParamTy, false);
- Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy));
+ Args.push_back(Builder->CreateBitCast(*AI, ParamTy));
}
// Add any parameter attributes.
@@ -1217,9 +1203,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
Value *NV = NC;
if (OldRetTy != NV->getType() && !Caller->use_empty()) {
if (!NV->getType()->isVoidTy()) {
- Instruction::CastOps opcode =
- CastInst::getCastOpcode(NC, false, OldRetTy, false);
- NV = NC = CastInst::Create(opcode, NC, OldRetTy);
+ NV = NC = CastInst::Create(CastInst::BitCast, NC, OldRetTy);
NC->setDebugLoc(Caller->getDebugLoc());
// If this is an invoke instruction, we should insert it after the first
@@ -1287,7 +1271,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
if (NestTy) {
Instruction *Caller = CS.getInstruction();
std::vector<Value*> NewArgs;
- NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
+ NewArgs.reserve(CS.arg_size() + 1);
SmallVector<AttributeSet, 8> NewAttrs;
NewAttrs.reserve(Attrs.getNumSlots() + 1);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 2ee1278..72377dc 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -677,7 +677,6 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
- case Instruction::Shl:
if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear) ||
!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp))
return false;
@@ -701,6 +700,17 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
// Otherwise, we don't know how to analyze this BitsToClear case yet.
return false;
+ case Instruction::Shl:
+ // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
+ // upper bits we can reduce BitsToClear by the shift amount.
+ if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear))
+ return false;
+ uint64_t ShiftAmt = Amt->getZExtValue();
+ BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
+ return true;
+ }
+ return false;
case Instruction::LShr:
// We can promote lshr(x, cst) if we can promote x. This requires the
// ultimate 'and' to clear out the high zero bits we're clearing out though.
@@ -1219,6 +1229,19 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
}
}
+ // (fptrunc (select cond, R1, Cst)) -->
+ // (select cond, (fptrunc R1), (fptrunc Cst))
+ SelectInst *SI = dyn_cast<SelectInst>(CI.getOperand(0));
+ if (SI &&
+ (isa<ConstantFP>(SI->getOperand(1)) ||
+ isa<ConstantFP>(SI->getOperand(2)))) {
+ Value *LHSTrunc = Builder->CreateFPTrunc(SI->getOperand(1),
+ CI.getType());
+ Value *RHSTrunc = Builder->CreateFPTrunc(SI->getOperand(2),
+ CI.getType());
+ return SelectInst::Create(SI->getOperand(0), LHSTrunc, RHSTrunc);
+ }
+
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI.getOperand(0));
if (II) {
switch (II->getIntrinsicID()) {
@@ -1239,9 +1262,14 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
}
// Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
+ // Note that we restrict this transformation based on
+ // TLI->has(LibFunc::sqrtf), even for the sqrt intrinsic, because
+ // TLI->has(LibFunc::sqrtf) is sufficient to guarantee that the
+ // single-precision intrinsic can be expanded in the backend.
CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) &&
- Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) &&
+ (Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) ||
+ Call->getCalledFunction()->getIntrinsicID() == Intrinsic::sqrt) &&
Call->getNumArgOperands() == 1 &&
Call->hasOneUse()) {
CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
@@ -1252,11 +1280,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
Arg->getOperand(0)->getType()->isFloatTy()) {
Function *Callee = Call->getCalledFunction();
Module *M = CI.getParent()->getParent()->getParent();
- Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf",
- Callee->getAttributes(),
- Builder->getFloatTy(),
- Builder->getFloatTy(),
- NULL);
+ Constant *SqrtfFunc = (Callee->getIntrinsicID() == Intrinsic::sqrt) ?
+ Intrinsic::getDeclaration(M, Intrinsic::sqrt, Builder->getFloatTy()) :
+ M->getOrInsertFunction("sqrtf", Callee->getAttributes(),
+ Builder->getFloatTy(), Builder->getFloatTy(),
+ NULL);
CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
"sqrtfcall");
ret->setAttributes(Callee->getAttributes());
@@ -1328,14 +1356,18 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
// If the source integer type is not the intptr_t type for this target, do a
// trunc or zext to the intptr_t type, then inttoptr of it. This allows the
// cast to be exposed to other transforms.
- if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() !=
- TD->getPointerSizeInBits()) {
- Type *Ty = TD->getIntPtrType(CI.getContext());
- if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
- Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
-
- Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
- return new IntToPtrInst(P, CI.getType());
+
+ if (TD) {
+ unsigned AS = CI.getAddressSpace();
+ if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
+ TD->getPointerSizeInBits(AS)) {
+ Type *Ty = TD->getIntPtrType(CI.getContext(), AS);
+ if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
+ Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
+
+ Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
+ return new IntToPtrInst(P, CI.getType());
+ }
}
if (Instruction *I = commonCastTransforms(CI))
@@ -1360,25 +1392,32 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
return &CI;
}
+ if (!TD)
+ return commonCastTransforms(CI);
+
// If the GEP has a single use, and the base pointer is a bitcast, and the
// GEP computes a constant offset, see if we can convert these three
// instructions into fewer. This typically happens with unions and other
// non-type-safe code.
- APInt Offset(TD ? TD->getPointerSizeInBits() : 1, 0);
- if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0)) &&
+ unsigned AS = GEP->getPointerAddressSpace();
+ unsigned OffsetBits = TD->getPointerSizeInBits(AS);
+ APInt Offset(OffsetBits, 0);
+ BitCastInst *BCI = dyn_cast<BitCastInst>(GEP->getOperand(0));
+ if (GEP->hasOneUse() &&
+ BCI &&
GEP->accumulateConstantOffset(*TD, Offset)) {
// Get the base pointer input of the bitcast, and the type it points to.
- Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
- Type *GEPIdxTy =
- cast<PointerType>(OrigBase->getType())->getElementType();
+ Value *OrigBase = BCI->getOperand(0);
SmallVector<Value*, 8> NewIndices;
- if (FindElementAtOffset(GEPIdxTy, Offset.getSExtValue(), NewIndices)) {
+ if (FindElementAtOffset(OrigBase->getType(),
+ Offset.getSExtValue(),
+ NewIndices)) {
// If we were able to index down into an element, create the GEP
// and bitcast the result. This eliminates one bitcast, potentially
// two.
Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
- Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
- Builder->CreateGEP(OrigBase, NewIndices);
+ Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
+ Builder->CreateGEP(OrigBase, NewIndices);
NGEP->takeName(GEP);
if (isa<BitCastInst>(CI))
@@ -1396,16 +1435,22 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
// If the destination integer type is not the intptr_t type for this target,
// do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
// to be exposed to other transforms.
- if (TD && CI.getType()->getScalarSizeInBits() != TD->getPointerSizeInBits()) {
- Type *Ty = TD->getIntPtrType(CI.getContext());
- if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
- Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
- Value *P = Builder->CreatePtrToInt(CI.getOperand(0), Ty);
- return CastInst::CreateIntegerCast(P, CI.getType(), /*isSigned=*/false);
- }
+ if (!TD)
+ return commonPointerCastTransforms(CI);
+
+ Type *Ty = CI.getType();
+ unsigned AS = CI.getPointerAddressSpace();
+
+ if (Ty->getScalarSizeInBits() == TD->getPointerSizeInBits(AS))
+ return commonPointerCastTransforms(CI);
- return commonPointerCastTransforms(CI);
+ Type *PtrTy = TD->getIntPtrType(CI.getContext(), AS);
+ if (Ty->isVectorTy()) // Handle vectors of pointers.
+ PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements());
+
+ Value *P = Builder->CreatePtrToInt(CI.getOperand(0), PtrTy);
+ return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
}
/// OptimizeVectorResize - This input value (which is known to have vector type)
@@ -1478,12 +1523,17 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
/// insertions into the vector. See the example in the comment for
/// OptimizeIntegerToVectorInsertions for the pattern this handles.
/// The type of V is always a non-zero multiple of VecEltTy's size.
+/// Shift is the number of bits between the lsb of V and the lsb of
+/// the vector.
///
/// This returns false if the pattern can't be matched or true if it can,
/// filling in Elements with the elements found here.
-static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
+static bool CollectInsertionElements(Value *V, unsigned Shift,
SmallVectorImpl<Value*> &Elements,
- Type *VecEltTy) {
+ Type *VecEltTy, InstCombiner &IC) {
+ assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
+ "Shift should be a multiple of the element type size");
+
// Undef values never contribute useful bits to the result.
if (isa<UndefValue>(V)) return true;
@@ -1495,8 +1545,12 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
if (C->isNullValue())
return true;
+ unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
+ if (IC.getDataLayout()->isBigEndian())
+ ElementIndex = Elements.size() - ElementIndex - 1;
+
// Fail if multiple elements are inserted into this slot.
- if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
+ if (Elements[ElementIndex] != 0)
return false;
Elements[ElementIndex] = V;
@@ -1512,7 +1566,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
// it to the right type so it gets properly inserted.
if (NumElts == 1)
return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
- ElementIndex, Elements, VecEltTy);
+ Shift, Elements, VecEltTy, IC);
// Okay, this is a constant that covers multiple elements. Slice it up into
// pieces and insert each element-sized piece into the vector.
@@ -1523,10 +1577,11 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
for (unsigned i = 0; i != NumElts; ++i) {
+ unsigned ShiftI = Shift+i*ElementSize;
Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
- i*ElementSize));
+ ShiftI));
Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
- if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy))
+ if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy, IC))
return false;
}
return true;
@@ -1539,29 +1594,28 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
switch (I->getOpcode()) {
default: return false; // Unhandled case.
case Instruction::BitCast:
- return CollectInsertionElements(I->getOperand(0), ElementIndex,
- Elements, VecEltTy);
+ return CollectInsertionElements(I->getOperand(0), Shift,
+ Elements, VecEltTy, IC);
case Instruction::ZExt:
if (!isMultipleOfTypeSize(
I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
VecEltTy))
return false;
- return CollectInsertionElements(I->getOperand(0), ElementIndex,
- Elements, VecEltTy);
+ return CollectInsertionElements(I->getOperand(0), Shift,
+ Elements, VecEltTy, IC);
case Instruction::Or:
- return CollectInsertionElements(I->getOperand(0), ElementIndex,
- Elements, VecEltTy) &&
- CollectInsertionElements(I->getOperand(1), ElementIndex,
- Elements, VecEltTy);
+ return CollectInsertionElements(I->getOperand(0), Shift,
+ Elements, VecEltTy, IC) &&
+ CollectInsertionElements(I->getOperand(1), Shift,
+ Elements, VecEltTy, IC);
case Instruction::Shl: {
// Must be shifting by a constant that is a multiple of the element size.
ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
if (CI == 0) return false;
- if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false;
- unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy);
-
- return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift,
- Elements, VecEltTy);
+ Shift += CI->getZExtValue();
+ if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
+ return CollectInsertionElements(I->getOperand(0), Shift,
+ Elements, VecEltTy, IC);
}
}
@@ -1584,12 +1638,15 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
/// Into two insertelements that do "buildvector{%inc, %inc5}".
static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
InstCombiner &IC) {
+ // We need to know the target byte order to perform this optimization.
+ if (!IC.getDataLayout()) return 0;
+
VectorType *DestVecTy = cast<VectorType>(CI.getType());
Value *IntInput = CI.getOperand(0);
SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
if (!CollectInsertionElements(IntInput, 0, Elements,
- DestVecTy->getElementType()))
+ DestVecTy->getElementType(), IC))
return 0;
// If we succeeded, we know that all of the element are specified by Elements
@@ -1775,10 +1832,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// Okay, we have (bitcast (shuffle ..)). Check to see if this is
// a bitcast to a vector with the same # elts.
if (SVI->hasOneUse() && DestTy->isVectorTy() &&
- cast<VectorType>(DestTy)->getNumElements() ==
- SVI->getType()->getNumElements() &&
+ DestTy->getVectorNumElements() == SVI->getType()->getNumElements() &&
SVI->getType()->getNumElements() ==
- cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements()) {
+ SVI->getOperand(0)->getType()->getVectorNumElements()) {
BitCastInst *Tmp;
// If either of the operands is a cast from CI.getType(), then
// evaluating the shuffle in the casted destination's type will allow
@@ -1800,3 +1856,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
return commonPointerCastTransforms(CI);
return commonCastTransforms(CI);
}
+
+Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
+ return commonCastTransforms(CI);
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 4c252c0..9bb65ef 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -227,7 +227,8 @@ Instruction *InstCombiner::
FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
CmpInst &ICI, ConstantInt *AndCst) {
// We need TD information to know the pointer size unless this is inbounds.
- if (!GEP->isInBounds() && TD == 0) return 0;
+ if (!GEP->isInBounds() && TD == 0)
+ return 0;
Constant *Init = GV->getInitializer();
if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
@@ -393,16 +394,19 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// If the index is larger than the pointer size of the target, truncate the
// index down like the GEP would do implicitly. We don't have to do this for
// an inbounds GEP because the index can't be out of range.
- if (!GEP->isInBounds() &&
- Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits())
- Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext()));
+ if (!GEP->isInBounds()) {
+ Type *IntPtrTy = TD->getIntPtrType(GEP->getType());
+ unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
+ if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize)
+ Idx = Builder->CreateTrunc(Idx, IntPtrTy);
+ }
// If the comparison is only true for one or two elements, emit direct
// comparisons.
if (SecondTrueElement != Overdefined) {
// None true -> false.
if (FirstTrueElement == Undefined)
- return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(GEP->getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getFalse());
Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement);
@@ -422,7 +426,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
if (SecondFalseElement != Overdefined) {
// None false -> true.
if (FirstFalseElement == Undefined)
- return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(GEP->getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getTrue());
Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement);
@@ -562,16 +566,18 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
}
}
+
+
// Okay, we know we have a single variable index, which must be a
// pointer/array/vector index. If there is no offset, life is simple, return
// the index.
- unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ Type *IntPtrTy = TD.getIntPtrType(GEP->getOperand(0)->getType());
+ unsigned IntPtrWidth = IntPtrTy->getIntegerBitWidth();
if (Offset == 0) {
// Cast to intptrty in case a truncation occurs. If an extension is needed,
// we don't need to bother extending: the extension won't affect where the
// computation crosses zero.
if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) {
- Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy);
}
return VariableIdx;
@@ -593,7 +599,6 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
return 0;
// Okay, we can do this evaluation. Start by converting the index to intptr.
- Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
if (VariableIdx->getType() != IntPtrTy)
VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy,
true /*Signed*/);
@@ -647,8 +652,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// If all indices are the same, just compare the base pointers.
if (IndicesTheSame)
- return new ICmpInst(ICmpInst::getSignedPredicate(Cond),
- GEPLHS->getOperand(0), GEPRHS->getOperand(0));
+ return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0));
// If we're comparing GEPs with two base pointers that only differ in type
// and both GEPs have only constant indices or just one use, then fold
@@ -679,7 +683,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
}
if (AllZeros)
return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0),
- ICmpInst::getSwappedPredicate(Cond), I);
+ ICmpInst::getSwappedPredicate(Cond), I);
// If the other GEP has all zero indices, recurse.
AllZeros = true;
@@ -712,8 +716,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
if (NumDifferences == 0) // SAME GEP?
return ReplaceInstUsesWith(I, // No comparison is needed here.
- ConstantInt::get(Type::getInt1Ty(I.getContext()),
- ICmpInst::isTrueWhenEqual(Cond)));
+ Builder->getInt1(ICmpInst::isTrueWhenEqual(Cond)));
else if (NumDifferences == 1 && GEPsInBounds) {
Value *LHSV = GEPLHS->getOperand(DiffOperand);
@@ -739,10 +742,9 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
}
/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X".
-Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
+Instruction *InstCombiner::FoldICmpAddOpCst(Instruction &ICI,
Value *X, ConstantInt *CI,
- ICmpInst::Predicate Pred,
- Value *TheAdd) {
+ ICmpInst::Predicate Pred) {
// If we have X+0, exit early (simplifying logic below) and let it get folded
// elsewhere. icmp X+0, X -> icmp X, X
if (CI->isZero()) {
@@ -752,11 +754,11 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
// (X+4) == X -> false.
if (Pred == ICmpInst::ICMP_EQ)
- return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getFalse());
// (X+4) != X -> true.
if (Pred == ICmpInst::ICMP_NE)
- return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getTrue());
// From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
// so the values can never be equal. Similarly for all other "or equals"
@@ -798,7 +800,7 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
// (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128
assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
- Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1);
+ Constant *C = Builder->getInt(CI->getValue()-1);
return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
}
@@ -921,7 +923,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
default: llvm_unreachable("Unhandled icmp opcode!");
case ICmpInst::ICMP_EQ:
if (LoOverflow && HiOverflow)
- return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getFalse());
if (HiOverflow)
return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
ICmpInst::ICMP_UGE, X, LoBound);
@@ -932,7 +934,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
DivIsSigned, true));
case ICmpInst::ICMP_NE:
if (LoOverflow && HiOverflow)
- return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getTrue());
if (HiOverflow)
return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
ICmpInst::ICMP_ULT, X, LoBound);
@@ -944,16 +946,16 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_SLT:
if (LoOverflow == +1) // Low bound is greater than input range.
- return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getTrue());
if (LoOverflow == -1) // Low bound is less than input range.
- return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getFalse());
return new ICmpInst(Pred, X, LoBound);
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_SGT:
if (HiOverflow == +1) // High bound greater than input range.
- return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getFalse());
if (HiOverflow == -1) // High bound less than input range.
- return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getTrue());
if (Pred == ICmpInst::ICMP_UGT)
return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
@@ -1017,7 +1019,7 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
// If we are comparing against bits always shifted out, the
// comparison cannot succeed.
APInt Comp = CmpRHSV << ShAmtVal;
- ConstantInt *ShiftedCmpRHS = ConstantInt::get(ICI.getContext(), Comp);
+ ConstantInt *ShiftedCmpRHS = Builder->getInt(Comp);
if (Shr->getOpcode() == Instruction::LShr)
Comp = Comp.lshr(ShAmtVal);
else
@@ -1025,8 +1027,7 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero.
bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
- Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
- IsICMP_NE);
+ Constant *Cst = Builder->getInt1(IsICMP_NE);
return ReplaceInstUsesWith(ICI, Cst);
}
@@ -1039,7 +1040,7 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
if (Shr->hasOneUse()) {
// Otherwise strength reduce the shift into an and.
APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
- Constant *Mask = ConstantInt::get(ICI.getContext(), Val);
+ Constant *Mask = Builder->getInt(Val);
Value *And = Builder->CreateAnd(Shr->getOperand(0),
Mask, Shr->getName()+".mask");
@@ -1072,7 +1073,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
APInt NewRHS = RHS->getValue().zext(SrcBits);
NewRHS |= KnownOne & APInt::getHighBitsSet(SrcBits, SrcBits-DstBits);
return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
- ConstantInt::get(ICI.getContext(), NewRHS));
+ Builder->getInt(NewRHS));
}
}
break;
@@ -1115,8 +1116,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
? ICI.getUnsignedPredicate()
: ICI.getSignedPredicate();
return new ICmpInst(Pred, LHSI->getOperand(0),
- ConstantInt::get(ICI.getContext(),
- RHSV ^ SignBit));
+ Builder->getInt(RHSV ^ SignBit));
}
// (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
@@ -1127,10 +1127,21 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
: ICI.getSignedPredicate();
Pred = ICI.getSwappedPredicate(Pred);
return new ICmpInst(Pred, LHSI->getOperand(0),
- ConstantInt::get(ICI.getContext(),
- RHSV ^ NotSignBit));
+ Builder->getInt(RHSV ^ NotSignBit));
}
}
+
+ // (icmp ugt (xor X, C), ~C) -> (icmp ult X, C)
+ // iff -C is a power of 2
+ if (ICI.getPredicate() == ICmpInst::ICMP_UGT &&
+ XorCST->getValue() == ~RHSV && (RHSV + 1).isPowerOf2())
+ return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), XorCST);
+
+ // (icmp ult (xor X, C), -C) -> (icmp uge X, C)
+ // iff -C is a power of 2
+ if (ICI.getPredicate() == ICmpInst::ICMP_ULT &&
+ XorCST->getValue() == -RHSV && RHSV.isPowerOf2())
+ return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), XorCST);
}
break;
case Instruction::And: // (icmp pred (and X, AndCST), RHS)
@@ -1187,11 +1198,16 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
Type *AndTy = AndCST->getType(); // Type of the and.
// We can fold this as long as we can't shift unknown bits
- // into the mask. This can only happen with signed shift
- // rights, as they sign-extend.
+ // into the mask. This can happen with signed shift
+ // rights, as they sign-extend. With logical shifts,
+ // we must still make sure the comparison is not signed
+ // because we are effectively changing the
+ // position of the sign bit (PR17827).
+ // TODO: We can relax these constraints a bit more.
if (ShAmt) {
- bool CanFold = Shift->isLogicalShift();
- if (!CanFold) {
+ bool CanFold = false;
+ unsigned ShiftOpcode = Shift->getOpcode();
+ if (ShiftOpcode == Instruction::AShr) {
// To test for the bad case of the signed shr, see if any
// of the bits shifted in could be tested after the mask.
uint32_t TyBits = Ty->getPrimitiveSizeInBits();
@@ -1201,6 +1217,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) &
AndCST->getValue()) == 0)
CanFold = true;
+ } else if (ShiftOpcode == Instruction::Shl ||
+ ShiftOpcode == Instruction::LShr) {
+ CanFold = !ICI.isSigned();
}
if (CanFold) {
@@ -1218,11 +1237,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
// As a special case, check to see if this means that the
// result is always true or false now.
if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
- return ReplaceInstUsesWith(ICI,
- ConstantInt::getFalse(ICI.getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getFalse());
if (ICI.getPredicate() == ICmpInst::ICMP_NE)
- return ReplaceInstUsesWith(ICI,
- ConstantInt::getTrue(ICI.getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getTrue());
} else {
ICI.setOperand(1, NewCst);
Constant *NewAndCST;
@@ -1284,6 +1301,15 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
return Res;
}
}
+
+ // X & -C == -C -> X > u ~C
+ // X & -C != -C -> X <= u ~C
+ // iff C is a power of 2
+ if (ICI.isEquality() && RHS == LHSI->getOperand(1) && (-RHSV).isPowerOf2())
+ return new ICmpInst(
+ ICI.getPredicate() == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_UGT
+ : ICmpInst::ICMP_ULE,
+ LHSI->getOperand(0), SubOne(RHS));
break;
case Instruction::Or: {
@@ -1325,10 +1351,80 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI)
+ uint32_t TypeBits = RHSV.getBitWidth();
ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
- if (!ShAmt) break;
+ if (!ShAmt) {
+ Value *X;
+ // (1 << X) pred P2 -> X pred Log2(P2)
+ if (match(LHSI, m_Shl(m_One(), m_Value(X)))) {
+ bool RHSVIsPowerOf2 = RHSV.isPowerOf2();
+ ICmpInst::Predicate Pred = ICI.getPredicate();
+ if (ICI.isUnsigned()) {
+ if (!RHSVIsPowerOf2) {
+ // (1 << X) < 30 -> X <= 4
+ // (1 << X) <= 30 -> X <= 4
+ // (1 << X) >= 30 -> X > 4
+ // (1 << X) > 30 -> X > 4
+ if (Pred == ICmpInst::ICMP_ULT)
+ Pred = ICmpInst::ICMP_ULE;
+ else if (Pred == ICmpInst::ICMP_UGE)
+ Pred = ICmpInst::ICMP_UGT;
+ }
+ unsigned RHSLog2 = RHSV.logBase2();
+
+ // (1 << X) >= 2147483648 -> X >= 31 -> X == 31
+ // (1 << X) > 2147483648 -> X > 31 -> false
+ // (1 << X) <= 2147483648 -> X <= 31 -> true
+ // (1 << X) < 2147483648 -> X < 31 -> X != 31
+ if (RHSLog2 == TypeBits-1) {
+ if (Pred == ICmpInst::ICMP_UGE)
+ Pred = ICmpInst::ICMP_EQ;
+ else if (Pred == ICmpInst::ICMP_UGT)
+ return ReplaceInstUsesWith(ICI, Builder->getFalse());
+ else if (Pred == ICmpInst::ICMP_ULE)
+ return ReplaceInstUsesWith(ICI, Builder->getTrue());
+ else if (Pred == ICmpInst::ICMP_ULT)
+ Pred = ICmpInst::ICMP_NE;
+ }
- uint32_t TypeBits = RHSV.getBitWidth();
+ return new ICmpInst(Pred, X,
+ ConstantInt::get(RHS->getType(), RHSLog2));
+ } else if (ICI.isSigned()) {
+ if (RHSV.isAllOnesValue()) {
+ // (1 << X) <= -1 -> X == 31
+ if (Pred == ICmpInst::ICMP_SLE)
+ return new ICmpInst(ICmpInst::ICMP_EQ, X,
+ ConstantInt::get(RHS->getType(), TypeBits-1));
+
+ // (1 << X) > -1 -> X != 31
+ if (Pred == ICmpInst::ICMP_SGT)
+ return new ICmpInst(ICmpInst::ICMP_NE, X,
+ ConstantInt::get(RHS->getType(), TypeBits-1));
+ } else if (!RHSV) {
+ // (1 << X) < 0 -> X == 31
+ // (1 << X) <= 0 -> X == 31
+ if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
+ return new ICmpInst(ICmpInst::ICMP_EQ, X,
+ ConstantInt::get(RHS->getType(), TypeBits-1));
+
+ // (1 << X) >= 0 -> X != 31
+ // (1 << X) > 0 -> X != 31
+ if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE)
+ return new ICmpInst(ICmpInst::ICMP_NE, X,
+ ConstantInt::get(RHS->getType(), TypeBits-1));
+ }
+ } else if (ICI.isEquality()) {
+ if (RHSVIsPowerOf2)
+ return new ICmpInst(
+ Pred, X, ConstantInt::get(RHS->getType(), RHSV.logBase2()));
+
+ return ReplaceInstUsesWith(
+ ICI, Pred == ICmpInst::ICMP_EQ ? Builder->getFalse()
+ : Builder->getTrue());
+ }
+ }
+ break;
+ }
// Check that the shift amount is in range. If not, don't perform
// undefined shifts. When the shift is visited it will be
@@ -1344,8 +1440,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
ShAmt);
if (Comp != RHS) {// Comparing against a bit that we know is zero.
bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
- Constant *Cst =
- ConstantInt::get(Type::getInt1Ty(ICI.getContext()), IsICMP_NE);
+ Constant *Cst = Builder->getInt1(IsICMP_NE);
return ReplaceInstUsesWith(ICI, Cst);
}
@@ -1364,9 +1459,8 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if (LHSI->hasOneUse()) {
// Otherwise strength reduce the shift into an and.
uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
- Constant *Mask =
- ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits,
- TypeBits-ShAmtVal));
+ Constant *Mask = Builder->getInt(APInt::getLowBitsSet(TypeBits,
+ TypeBits - ShAmtVal));
Value *And =
Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask");
@@ -1451,6 +1545,30 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
return R;
break;
+ case Instruction::Sub: {
+ ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(0));
+ if (!LHSC) break;
+ const APInt &LHSV = LHSC->getValue();
+
+ // C1-X <u C2 -> (X|(C2-1)) == C1
+ // iff C1 & (C2-1) == C2-1
+ // C2 is a power of 2
+ if (ICI.getPredicate() == ICmpInst::ICMP_ULT && LHSI->hasOneUse() &&
+ RHSV.isPowerOf2() && (LHSV & (RHSV - 1)) == (RHSV - 1))
+ return new ICmpInst(ICmpInst::ICMP_EQ,
+ Builder->CreateOr(LHSI->getOperand(1), RHSV - 1),
+ LHSC);
+
+ // C1-X >u C2 -> (X|C2) != C1
+ // iff C1 & C2 == C2
+ // C2+1 is a power of 2
+ if (ICI.getPredicate() == ICmpInst::ICMP_UGT && LHSI->hasOneUse() &&
+ (RHSV + 1).isPowerOf2() && (LHSV & RHSV) == RHSV)
+ return new ICmpInst(ICmpInst::ICMP_NE,
+ Builder->CreateOr(LHSI->getOperand(1), RHSV), LHSC);
+ break;
+ }
+
case Instruction::Add:
// Fold: icmp pred (add X, C1), C2
if (!ICI.isEquality()) {
@@ -1464,20 +1582,38 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if (ICI.isSigned()) {
if (CR.getLower().isSignBit()) {
return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0),
- ConstantInt::get(ICI.getContext(),CR.getUpper()));
+ Builder->getInt(CR.getUpper()));
} else if (CR.getUpper().isSignBit()) {
return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0),
- ConstantInt::get(ICI.getContext(),CR.getLower()));
+ Builder->getInt(CR.getLower()));
}
} else {
if (CR.getLower().isMinValue()) {
return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0),
- ConstantInt::get(ICI.getContext(),CR.getUpper()));
+ Builder->getInt(CR.getUpper()));
} else if (CR.getUpper().isMinValue()) {
return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0),
- ConstantInt::get(ICI.getContext(),CR.getLower()));
+ Builder->getInt(CR.getLower()));
}
}
+
+ // X-C1 <u C2 -> (X & -C2) == C1
+ // iff C1 & (C2-1) == 0
+ // C2 is a power of 2
+ if (ICI.getPredicate() == ICmpInst::ICMP_ULT && LHSI->hasOneUse() &&
+ RHSV.isPowerOf2() && (LHSV & (RHSV - 1)) == 0)
+ return new ICmpInst(ICmpInst::ICMP_EQ,
+ Builder->CreateAnd(LHSI->getOperand(0), -RHSV),
+ ConstantExpr::getNeg(LHSC));
+
+ // X-C1 >u C2 -> (X & ~C2) != C1
+ // iff C1 & C2 == 0
+ // C2+1 is a power of 2
+ if (ICI.getPredicate() == ICmpInst::ICMP_UGT && LHSI->hasOneUse() &&
+ (RHSV + 1).isPowerOf2() && (LHSV & RHSV) == 0)
+ return new ICmpInst(ICmpInst::ICMP_NE,
+ Builder->CreateAnd(LHSI->getOperand(0), ~RHSV),
+ ConstantExpr::getNeg(LHSC));
}
break;
}
@@ -1555,9 +1691,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
Constant *NotCI = ConstantExpr::getNot(RHS);
if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())
- return ReplaceInstUsesWith(ICI,
- ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
- isICMP_NE));
+ return ReplaceInstUsesWith(ICI, Builder->getInt1(isICMP_NE));
}
break;
@@ -1566,9 +1700,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
// If bits are being compared against that are and'd out, then the
// comparison can never succeed!
if ((RHSV & ~BOC->getValue()) != 0)
- return ReplaceInstUsesWith(ICI,
- ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
- isICMP_NE));
+ return ReplaceInstUsesWith(ICI, Builder->getInt1(isICMP_NE));
// If we have ((X & C) == C), turn it into ((X & C) != 0).
if (RHS == BOC && RHSV.isPowerOf2())
@@ -1619,7 +1751,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
case Intrinsic::bswap:
Worklist.Add(II);
ICI.setOperand(0, II->getArgOperand(0));
- ICI.setOperand(1, ConstantInt::get(II->getContext(), RHSV.byteSwap()));
+ ICI.setOperand(1, Builder->getInt(RHSV.byteSwap()));
return &ICI;
case Intrinsic::ctlz:
case Intrinsic::cttz:
@@ -1661,8 +1793,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
// integer type is the same size as the pointer type.
if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
- TD->getPointerSizeInBits() ==
- cast<IntegerType>(DestTy)->getBitWidth()) {
+ TD->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) {
Value *RHSOp = 0;
if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
@@ -1915,14 +2046,59 @@ static APInt DemandedBitsLHSMask(ICmpInst &I,
}
+/// \brief Check if the order of \p Op0 and \p Op1 as operand in an ICmpInst
+/// should be swapped.
+/// The descision is based on how many times these two operands are reused
+/// as subtract operands and their positions in those instructions.
+/// The rational is that several architectures use the same instruction for
+/// both subtract and cmp, thus it is better if the order of those operands
+/// match.
+/// \return true if Op0 and Op1 should be swapped.
+static bool swapMayExposeCSEOpportunities(const Value * Op0,
+ const Value * Op1) {
+ // Filter out pointer value as those cannot appears directly in subtract.
+ // FIXME: we may want to go through inttoptrs or bitcasts.
+ if (Op0->getType()->isPointerTy())
+ return false;
+ // Count every uses of both Op0 and Op1 in a subtract.
+ // Each time Op0 is the first operand, count -1: swapping is bad, the
+ // subtract has already the same layout as the compare.
+ // Each time Op0 is the second operand, count +1: swapping is good, the
+ // subtract has a diffrent layout as the compare.
+ // At the end, if the benefit is greater than 0, Op0 should come second to
+ // expose more CSE opportunities.
+ int GlobalSwapBenefits = 0;
+ for (Value::const_use_iterator UI = Op0->use_begin(), UIEnd = Op0->use_end(); UI != UIEnd; ++UI) {
+ const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(*UI);
+ if (!BinOp || BinOp->getOpcode() != Instruction::Sub)
+ continue;
+ // If Op0 is the first argument, this is not beneficial to swap the
+ // arguments.
+ int LocalSwapBenefits = -1;
+ unsigned Op1Idx = 1;
+ if (BinOp->getOperand(Op1Idx) == Op0) {
+ Op1Idx = 0;
+ LocalSwapBenefits = 1;
+ }
+ if (BinOp->getOperand(Op1Idx) != Op1)
+ continue;
+ GlobalSwapBenefits += LocalSwapBenefits;
+ }
+ return GlobalSwapBenefits > 0;
+}
+
Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
bool Changed = false;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ unsigned Op0Cplxity = getComplexity(Op0);
+ unsigned Op1Cplxity = getComplexity(Op1);
/// Orders the operands of the compare so that they are listed from most
/// complex to least complex. This puts constants before unary operators,
/// before binary operators.
- if (getComplexity(Op0) < getComplexity(Op1)) {
+ if (Op0Cplxity < Op1Cplxity ||
+ (Op0Cplxity == Op1Cplxity &&
+ swapMayExposeCSEOpportunities(Op0, Op1))) {
I.swapOperands();
std::swap(Op0, Op1);
Changed = true;
@@ -2041,19 +2217,19 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
case ICmpInst::ICMP_ULE:
assert(!CI->isMaxValue(false)); // A <=u MAX -> TRUE
return new ICmpInst(ICmpInst::ICMP_ULT, Op0,
- ConstantInt::get(CI->getContext(), CI->getValue()+1));
+ Builder->getInt(CI->getValue()+1));
case ICmpInst::ICMP_SLE:
assert(!CI->isMaxValue(true)); // A <=s MAX -> TRUE
return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
- ConstantInt::get(CI->getContext(), CI->getValue()+1));
+ Builder->getInt(CI->getValue()+1));
case ICmpInst::ICMP_UGE:
assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE
return new ICmpInst(ICmpInst::ICMP_UGT, Op0,
- ConstantInt::get(CI->getContext(), CI->getValue()-1));
+ Builder->getInt(CI->getValue()-1));
case ICmpInst::ICMP_SGE:
assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE
return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
- ConstantInt::get(CI->getContext(), CI->getValue()-1));
+ Builder->getInt(CI->getValue()-1));
}
// If this comparison is a normal comparison, it demands all
@@ -2192,7 +2368,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
if (Op1Max == Op0Min+1) // A <u C -> A == C-1 if min(A)+1 == C
return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- ConstantInt::get(CI->getContext(), CI->getValue()-1));
+ Builder->getInt(CI->getValue()-1));
// (x <u 2147483648) -> (x >s -1) -> true if sign bit clear
if (CI->isMinValue(true))
@@ -2211,7 +2387,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
if (Op1Min == Op0Max-1) // A >u C -> A == C+1 if max(a)-1 == C
return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- ConstantInt::get(CI->getContext(), CI->getValue()+1));
+ Builder->getInt(CI->getValue()+1));
// (x >u 2147483647) -> (x <s 0) -> true if sign bit set
if (CI->isMaxValue(true))
@@ -2229,7 +2405,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
if (Op1Max == Op0Min+1) // A <s C -> A == C-1 if min(A)+1 == C
return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- ConstantInt::get(CI->getContext(), CI->getValue()-1));
+ Builder->getInt(CI->getValue()-1));
}
break;
case ICmpInst::ICMP_SGT:
@@ -2243,7 +2419,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
if (Op1Min == Op0Max-1) // A >s C -> A == C+1 if max(A)-1 == C
return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- ConstantInt::get(CI->getContext(), CI->getValue()+1));
+ Builder->getInt(CI->getValue()+1));
}
break;
case ICmpInst::ICMP_SGE:
@@ -2357,7 +2533,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
case Instruction::IntToPtr:
// icmp pred inttoptr(X), null -> icmp pred X, 0
if (RHSC->isNullValue() && TD &&
- TD->getIntPtrType(RHSC->getContext()) ==
+ TD->getIntPtrType(RHSC->getType()) ==
LHSI->getOperand(0)->getType())
return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
Constant::getNullValue(LHSI->getOperand(0)->getType()));
@@ -2719,8 +2895,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
ConstantInt *C1, *C2;
if (match(B, m_ConstantInt(C1)) &&
match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) {
- Constant *NC = ConstantInt::get(I.getContext(),
- C1->getValue() ^ C2->getValue());
+ Constant *NC = Builder->getInt(C1->getValue() ^ C2->getValue());
Value *Xor = Builder->CreateXor(C, NC);
return new ICmpInst(I.getPredicate(), A, Xor);
}
@@ -2781,6 +2956,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Builder->CreateTrunc(B, A->getType()));
}
+ // (A >> C) == (B >> C) --> (A^B) u< (1 << C)
+ // For lshr and ashr pairs.
+ if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) &&
+ match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) ||
+ (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) &&
+ match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) {
+ unsigned TypeBits = Cst1->getBitWidth();
+ unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
+ if (ShAmt < TypeBits && ShAmt != 0) {
+ ICmpInst::Predicate Pred = I.getPredicate() == ICmpInst::ICMP_NE
+ ? ICmpInst::ICMP_UGE
+ : ICmpInst::ICMP_ULT;
+ Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
+ APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt);
+ return new ICmpInst(Pred, Xor, Builder->getInt(CmpVal));
+ }
+ }
+
// Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
// "icmp (and X, mask), cst"
uint64_t ShAmt = 0;
@@ -2811,20 +3004,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Value *X; ConstantInt *Cst;
// icmp X+Cst, X
if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)
- return FoldICmpAddOpCst(I, X, Cst, I.getPredicate(), Op0);
+ return FoldICmpAddOpCst(I, X, Cst, I.getPredicate());
// icmp X, X+Cst
if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
- return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate(), Op1);
+ return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate());
}
return Changed ? &I : 0;
}
-
-
-
-
-
/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible.
///
Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
@@ -2885,9 +3073,9 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
Pred = ICmpInst::ICMP_NE;
break;
case FCmpInst::FCMP_ORD:
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getTrue());
case FCmpInst::FCMP_UNO:
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getFalse());
}
IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
@@ -2901,50 +3089,50 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
if (!LHSUnsigned) {
// If the RHS value is > SignedMax, fold the comparison. This handles +INF
// and large values.
- APFloat SMax(RHS.getSemantics(), APFloat::fcZero, false);
+ APFloat SMax(RHS.getSemantics());
SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true,
APFloat::rmNearestTiesToEven);
if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0
if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT ||
Pred == ICmpInst::ICMP_SLE)
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getTrue());
+ return ReplaceInstUsesWith(I, Builder->getFalse());
}
} else {
// If the RHS value is > UnsignedMax, fold the comparison. This handles
// +INF and large values.
- APFloat UMax(RHS.getSemantics(), APFloat::fcZero, false);
+ APFloat UMax(RHS.getSemantics());
UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false,
APFloat::rmNearestTiesToEven);
if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0
if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT ||
Pred == ICmpInst::ICMP_ULE)
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getTrue());
+ return ReplaceInstUsesWith(I, Builder->getFalse());
}
}
if (!LHSUnsigned) {
// See if the RHS value is < SignedMin.
- APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false);
+ APFloat SMin(RHS.getSemantics());
SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true,
APFloat::rmNearestTiesToEven);
if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
Pred == ICmpInst::ICMP_SGE)
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getTrue());
+ return ReplaceInstUsesWith(I, Builder->getFalse());
}
} else {
// See if the RHS value is < UnsignedMin.
- APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false);
+ APFloat SMin(RHS.getSemantics());
SMin.convertFromAPInt(APInt::getMinValue(IntWidth), true,
APFloat::rmNearestTiesToEven);
if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // umin > 12312.0
if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT ||
Pred == ICmpInst::ICMP_UGE)
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getTrue());
+ return ReplaceInstUsesWith(I, Builder->getFalse());
}
}
@@ -2966,14 +3154,14 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
switch (Pred) {
default: llvm_unreachable("Unexpected integer comparison!");
case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getTrue());
case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getFalse());
case ICmpInst::ICMP_ULE:
// (float)int <= 4.4 --> int <= 4
// (float)int <= -4.4 --> false
if (RHS.isNegative())
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getFalse());
break;
case ICmpInst::ICMP_SLE:
// (float)int <= 4.4 --> int <= 4
@@ -2985,7 +3173,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
// (float)int < -4.4 --> false
// (float)int < 4.4 --> int <= 4
if (RHS.isNegative())
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getFalse());
Pred = ICmpInst::ICMP_ULE;
break;
case ICmpInst::ICMP_SLT:
@@ -2998,7 +3186,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
// (float)int > 4.4 --> int > 4
// (float)int > -4.4 --> true
if (RHS.isNegative())
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getTrue());
break;
case ICmpInst::ICMP_SGT:
// (float)int > 4.4 --> int > 4
@@ -3010,7 +3198,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
// (float)int >= -4.4 --> true
// (float)int >= 4.4 --> int > 4
if (RHS.isNegative())
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getTrue());
Pred = ICmpInst::ICMP_UGT;
break;
case ICmpInst::ICMP_SGE:
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index e2d7966..4c861b3 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -154,7 +154,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Ensure that the alloca array size argument has type intptr_t, so that
// any casting is exposed early.
if (TD) {
- Type *IntPtrTy = TD->getIntPtrType(AI.getContext());
+ Type *IntPtrTy = TD->getIntPtrType(AI.getType());
if (AI.getArraySize()->getType() != IntPtrTy) {
Value *V = Builder->CreateIntCast(AI.getArraySize(),
IntPtrTy, false);
@@ -180,12 +180,13 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Now that I is pointing to the first non-allocation-inst in the block,
// insert our getelementptr instruction...
//
- Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext()));
- Value *Idx[2];
- Idx[0] = NullIdx;
- Idx[1] = NullIdx;
+ Type *IdxTy = TD
+ ? TD->getIntPtrType(AI.getType())
+ : Type::getInt64Ty(AI.getContext());
+ Value *NullIdx = Constant::getNullValue(IdxTy);
+ Value *Idx[2] = { NullIdx, NullIdx };
Instruction *GEP =
- GetElementPtrInst::CreateInBounds(New, Idx, New->getName()+".sub");
+ GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
InsertNewInstBefore(GEP, *It);
// Now make everything use the getelementptr instead of the original
@@ -262,9 +263,9 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
EraseInstFromFunction(*ToDelete[i]);
Constant *TheSrc = cast<Constant>(Copy->getSource());
- Instruction *NewI
- = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc,
- AI.getType()));
+ Constant *Cast
+ = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType());
+ Instruction *NewI = ReplaceInstUsesWith(AI, Cast);
EraseInstFromFunction(*Copy);
++NumGlobalCopies;
return NewI;
@@ -302,9 +303,11 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
if (Constant *CSrc = dyn_cast<Constant>(CastOp))
if (ASrcTy->getNumElements() != 0) {
- Value *Idxs[2];
- Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext()));
- Idxs[1] = Idxs[0];
+ Type *IdxTy = TD
+ ? TD->getIntPtrType(SrcTy)
+ : Type::getInt64Ty(SrcTy->getContext());
+ Value *Idx = Constant::getNullValue(IdxTy);
+ Value *Idxs[2] = { Idx, Idx };
CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs);
SrcTy = cast<PointerType>(CastOp->getType());
SrcPTy = SrcTy->getElementType();
@@ -315,7 +318,8 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
SrcPTy->isVectorTy()) &&
// Do not allow turning this into a load of an integer, which is then
// casted to a pointer, this pessimizes pointer analysis a lot.
- (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) &&
+ (SrcPTy->isPtrOrPtrVectorTy() ==
+ LI.getType()->isPtrOrPtrVectorTy()) &&
IC.getDataLayout()->getTypeSizeInBits(SrcPTy) ==
IC.getDataLayout()->getTypeSizeInBits(DestPTy)) {
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index ecc9fc3..a759548 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -95,6 +95,25 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
return MulExt.slt(Min) || MulExt.sgt(Max);
}
+/// \brief A helper routine of InstCombiner::visitMul().
+///
+/// If C is a vector of known powers of 2, then this function returns
+/// a new vector obtained from C replacing each element with its logBase2.
+/// Return a null pointer otherwise.
+static Constant *getLogBase2Vector(ConstantDataVector *CV) {
+ const APInt *IVal;
+ SmallVector<Constant *, 4> Elts;
+
+ for (unsigned I = 0, E = CV->getNumElements(); I != E; ++I) {
+ Constant *Elt = CV->getElementAsConstant(I);
+ if (!match(Elt, m_APInt(IVal)) || !IVal->isPowerOf2())
+ return 0;
+ Elts.push_back(ConstantInt::get(Elt->getType(), IVal->logBase2()));
+ }
+
+ return ConstantVector::get(Elts);
+}
+
Instruction *InstCombiner::visitMul(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -108,24 +127,37 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (match(Op1, m_AllOnes())) // X * -1 == 0 - X
return BinaryOperator::CreateNeg(Op0, I.getName());
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
-
- // ((X << C1)*C2) == (X * (C2 << C1))
- if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
- if (SI->getOpcode() == Instruction::Shl)
- if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
- return BinaryOperator::CreateMul(SI->getOperand(0),
- ConstantExpr::getShl(CI, ShOp));
-
- const APInt &Val = CI->getValue();
- if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C
- Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2());
- BinaryOperator *Shl = BinaryOperator::CreateShl(Op0, NewCst);
- if (I.hasNoSignedWrap()) Shl->setHasNoSignedWrap();
- if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap();
- return Shl;
+ // Also allow combining multiply instructions on vectors.
+ {
+ Value *NewOp;
+ Constant *C1, *C2;
+ const APInt *IVal;
+ if (match(&I, m_Mul(m_Shl(m_Value(NewOp), m_Constant(C2)),
+ m_Constant(C1))) &&
+ match(C1, m_APInt(IVal)))
+ // ((X << C1)*C2) == (X * (C2 << C1))
+ return BinaryOperator::CreateMul(NewOp, ConstantExpr::getShl(C1, C2));
+
+ if (match(&I, m_Mul(m_Value(NewOp), m_Constant(C1)))) {
+ Constant *NewCst = 0;
+ if (match(C1, m_APInt(IVal)) && IVal->isPowerOf2())
+ // Replace X*(2^C) with X << C, where C is either a scalar or a splat.
+ NewCst = ConstantInt::get(NewOp->getType(), IVal->logBase2());
+ else if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(C1))
+ // Replace X*(2^C) with X << C, where C is a vector of known
+ // constant powers of 2.
+ NewCst = getLogBase2Vector(CV);
+
+ if (NewCst) {
+ BinaryOperator *Shl = BinaryOperator::CreateShl(NewOp, NewCst);
+ if (I.hasNoSignedWrap()) Shl->setHasNoSignedWrap();
+ if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap();
+ return Shl;
+ }
}
+ }
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
// Canonicalize (X+C1)*CI -> X*CI+C1*CI.
{ Value *X; ConstantInt *C1;
if (Op0->hasOneUse() &&
@@ -306,13 +338,13 @@ static bool isFMulOrFDivWithConstant(Value *V) {
if (C0 && C1)
return false;
- return (C0 && C0->getValueAPF().isNormal()) ||
- (C1 && C1->getValueAPF().isNormal());
+ return (C0 && C0->getValueAPF().isFiniteNonZero()) ||
+ (C1 && C1->getValueAPF().isFiniteNonZero());
}
static bool isNormalFp(const ConstantFP *C) {
const APFloat &Flt = C->getValueAPF();
- return Flt.isNormal() && !Flt.isDenormal();
+ return Flt.isNormal();
}
/// foldFMulConst() is a helper routine of InstCombiner::visitFMul().
@@ -342,9 +374,12 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
} else {
if (C0) {
// (C0 / X) * C => (C0 * C) / X
- ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFMul(C0, C));
- if (isNormalFp(F))
- R = BinaryOperator::CreateFDiv(F, Opnd1);
+ if (FMulOrDiv->hasOneUse()) {
+ // It would otherwise introduce another div.
+ ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFMul(C0, C));
+ if (isNormalFp(F))
+ R = BinaryOperator::CreateFDiv(F, Opnd1);
+ }
} else {
// (X / C1) * C => X * (C/C1) if C/C1 is not a denormal
ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFDiv(C, C1));
@@ -391,7 +426,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
return NV;
ConstantFP *C = dyn_cast<ConstantFP>(Op1);
- if (C && AllowReassociate && C->getValueAPF().isNormal()) {
+ if (C && AllowReassociate && C->getValueAPF().isFiniteNonZero()) {
// Let MDC denote an expression in one of these forms:
// X * C, C/X, X/C, where C is a constant.
//
@@ -418,7 +453,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
Swap = true;
}
- if (C1 && C1->getValueAPF().isNormal() &&
+ if (C1 && C1->getValueAPF().isFiniteNonZero() &&
isFMulOrFDivWithConstant(Opnd0)) {
Value *M1 = ConstantExpr::getFMul(C1, C);
Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ?
@@ -428,10 +463,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (Swap && FAddSub->getOpcode() == Instruction::FSub)
std::swap(M0, M1);
- Value *R = (FAddSub->getOpcode() == Instruction::FAdd) ?
- BinaryOperator::CreateFAdd(M0, M1) :
- BinaryOperator::CreateFSub(M0, M1);
- Instruction *RI = cast<Instruction>(R);
+ Instruction *RI = (FAddSub->getOpcode() == Instruction::FAdd)
+ ? BinaryOperator::CreateFAdd(M0, M1)
+ : BinaryOperator::CreateFSub(M0, M1);
RI->copyFastMathFlags(&I);
return RI;
}
@@ -458,13 +492,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
}
// if pattern detected emit alternate sequence
if (OpX && OpY) {
+ BuilderTy::FastMathFlagGuard Guard(*Builder);
+ Builder->SetFastMathFlags(Log2->getFastMathFlags());
Log2->setArgOperand(0, OpY);
Value *FMulVal = Builder->CreateFMul(OpX, Log2);
- Instruction *FMul = cast<Instruction>(FMulVal);
- FMul->copyFastMathFlags(Log2);
- Instruction *FSub = BinaryOperator::CreateFSub(FMulVal, OpX);
- FSub->copyFastMathFlags(Log2);
- return FSub;
+ Value *FSub = Builder->CreateFSub(FMulVal, OpX);
+ FSub->takeName(&I);
+ return ReplaceInstUsesWith(I, FSub);
}
}
@@ -474,6 +508,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
for (int i = 0; i < 2; i++) {
bool IgnoreZeroSign = I.hasNoSignedZeros();
if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) {
+ BuilderTy::FastMathFlagGuard Guard(*Builder);
+ Builder->SetFastMathFlags(I.getFastMathFlags());
+
Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign);
Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign);
@@ -484,13 +521,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (Opnd0->hasOneUse()) {
// -X * Y => -(X*Y) (Promote negation as high as possible)
Value *T = Builder->CreateFMul(N0, Opnd1);
- cast<Instruction>(T)->setDebugLoc(I.getDebugLoc());
- Instruction *Neg = BinaryOperator::CreateFNeg(T);
- if (I.getFastMathFlags().any()) {
- cast<Instruction>(T)->copyFastMathFlags(&I);
- Neg->copyFastMathFlags(&I);
- }
- return Neg;
+ Value *Neg = Builder->CreateFNeg(T);
+ Neg->takeName(&I);
+ return ReplaceInstUsesWith(I, Neg);
}
}
@@ -513,13 +546,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
Y = Opnd0_0;
if (Y) {
- Instruction *T = cast<Instruction>(Builder->CreateFMul(Opnd1, Opnd1));
- T->copyFastMathFlags(&I);
- T->setDebugLoc(I.getDebugLoc());
+ BuilderTy::FastMathFlagGuard Guard(*Builder);
+ Builder->SetFastMathFlags(I.getFastMathFlags());
+ Value *T = Builder->CreateFMul(Opnd1, Opnd1);
- Instruction *R = BinaryOperator::CreateFMul(T, Y);
- R->copyFastMathFlags(&I);
- return R;
+ Value *R = Builder->CreateFMul(T, Y);
+ R->takeName(&I);
+ return ReplaceInstUsesWith(I, R);
}
}
}
@@ -528,10 +561,10 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) {
Value *LHS = Op0, *RHS = Op1;
Value *B, *C;
- if (!match(RHS, m_UIToFp(m_Value(C))))
+ if (!match(RHS, m_UIToFP(m_Value(C))))
std::swap(LHS, RHS);
- if (match(RHS, m_UIToFp(m_Value(C))) && C->getType()->isIntegerTy(1)) {
+ if (match(RHS, m_UIToFP(m_Value(C))) && C->getType()->isIntegerTy(1)) {
B = LHS;
Value *Zero = ConstantFP::getNegativeZero(B->getType());
return SelectInst::Create(C, B, Zero);
@@ -542,10 +575,10 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) {
Value *LHS = Op0, *RHS = Op1;
Value *A, *C;
- if (!match(RHS, m_FSub(m_FPOne(), m_UIToFp(m_Value(C)))))
+ if (!match(RHS, m_FSub(m_FPOne(), m_UIToFP(m_Value(C)))))
std::swap(LHS, RHS);
- if (match(RHS, m_FSub(m_FPOne(), m_UIToFp(m_Value(C)))) &&
+ if (match(RHS, m_FSub(m_FPOne(), m_UIToFP(m_Value(C)))) &&
C->getType()->isIntegerTy(1)) {
A = LHS;
Value *Zero = ConstantFP::getNegativeZero(A->getType());
@@ -613,8 +646,7 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
*I = SI->getOperand(NonNullOperand);
Worklist.Add(BBI);
} else if (*I == SelectCond) {
- *I = NonNullOperand == 1 ? ConstantInt::getTrue(BBI->getContext()) :
- ConstantInt::getFalse(BBI->getContext());
+ *I = Builder->getInt1(NonNullOperand == 1);
Worklist.Add(BBI);
}
}
@@ -703,40 +735,124 @@ static Value *dyn_castZExtVal(Value *V, Type *Ty) {
return 0;
}
-Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
- Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+namespace {
+const unsigned MaxDepth = 6;
+typedef Instruction *(*FoldUDivOperandCb)(Value *Op0, Value *Op1,
+ const BinaryOperator &I,
+ InstCombiner &IC);
+
+/// \brief Used to maintain state for visitUDivOperand().
+struct UDivFoldAction {
+ FoldUDivOperandCb FoldAction; ///< Informs visitUDiv() how to fold this
+ ///< operand. This can be zero if this action
+ ///< joins two actions together.
+
+ Value *OperandToFold; ///< Which operand to fold.
+ union {
+ Instruction *FoldResult; ///< The instruction returned when FoldAction is
+ ///< invoked.
+
+ size_t SelectLHSIdx; ///< Stores the LHS action index if this action
+ ///< joins two actions together.
+ };
+
+ UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand)
+ : FoldAction(FA), OperandToFold(InputOperand), FoldResult(0) {}
+ UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand, size_t SLHS)
+ : FoldAction(FA), OperandToFold(InputOperand), SelectLHSIdx(SLHS) {}
+};
+}
- if (Value *V = SimplifyUDivInst(Op0, Op1, TD))
- return ReplaceInstUsesWith(I, V);
+// X udiv 2^C -> X >> C
+static Instruction *foldUDivPow2Cst(Value *Op0, Value *Op1,
+ const BinaryOperator &I, InstCombiner &IC) {
+ const APInt &C = cast<Constant>(Op1)->getUniqueInteger();
+ BinaryOperator *LShr = BinaryOperator::CreateLShr(
+ Op0, ConstantInt::get(Op0->getType(), C.logBase2()));
+ if (I.isExact()) LShr->setIsExact();
+ return LShr;
+}
- // Handle the integer div common cases
- if (Instruction *Common = commonIDivTransforms(I))
- return Common;
+// X udiv C, where C >= signbit
+static Instruction *foldUDivNegCst(Value *Op0, Value *Op1,
+ const BinaryOperator &I, InstCombiner &IC) {
+ Value *ICI = IC.Builder->CreateICmpULT(Op0, cast<ConstantInt>(Op1));
- {
- // X udiv 2^C -> X >> C
- // Check to see if this is an unsigned division with an exact power of 2,
- // if so, convert to a right shift.
- const APInt *C;
- if (match(Op1, m_Power2(C))) {
- BinaryOperator *LShr =
- BinaryOperator::CreateLShr(Op0,
- ConstantInt::get(Op0->getType(),
- C->logBase2()));
- if (I.isExact()) LShr->setIsExact();
- return LShr;
- }
+ return SelectInst::Create(ICI, Constant::getNullValue(I.getType()),
+ ConstantInt::get(I.getType(), 1));
+}
+
+// X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2)
+static Instruction *foldUDivShl(Value *Op0, Value *Op1, const BinaryOperator &I,
+ InstCombiner &IC) {
+ Instruction *ShiftLeft = cast<Instruction>(Op1);
+ if (isa<ZExtInst>(ShiftLeft))
+ ShiftLeft = cast<Instruction>(ShiftLeft->getOperand(0));
+
+ const APInt &CI =
+ cast<Constant>(ShiftLeft->getOperand(0))->getUniqueInteger();
+ Value *N = ShiftLeft->getOperand(1);
+ if (CI != 1)
+ N = IC.Builder->CreateAdd(N, ConstantInt::get(N->getType(), CI.logBase2()));
+ if (ZExtInst *Z = dyn_cast<ZExtInst>(Op1))
+ N = IC.Builder->CreateZExt(N, Z->getDestTy());
+ BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, N);
+ if (I.isExact()) LShr->setIsExact();
+ return LShr;
+}
+
+// \brief Recursively visits the possible right hand operands of a udiv
+// instruction, seeing through select instructions, to determine if we can
+// replace the udiv with something simpler. If we find that an operand is not
+// able to simplify the udiv, we abort the entire transformation.
+static size_t visitUDivOperand(Value *Op0, Value *Op1, const BinaryOperator &I,
+ SmallVectorImpl<UDivFoldAction> &Actions,
+ unsigned Depth = 0) {
+ // Check to see if this is an unsigned division with an exact power of 2,
+ // if so, convert to a right shift.
+ if (match(Op1, m_Power2())) {
+ Actions.push_back(UDivFoldAction(foldUDivPow2Cst, Op1));
+ return Actions.size();
}
- if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) {
+ if (ConstantInt *C = dyn_cast<ConstantInt>(Op1))
// X udiv C, where C >= signbit
if (C->getValue().isNegative()) {
- Value *IC = Builder->CreateICmpULT(Op0, C);
- return SelectInst::Create(IC, Constant::getNullValue(I.getType()),
- ConstantInt::get(I.getType(), 1));
+ Actions.push_back(UDivFoldAction(foldUDivNegCst, C));
+ return Actions.size();
}
+
+ // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2)
+ if (match(Op1, m_Shl(m_Power2(), m_Value())) ||
+ match(Op1, m_ZExt(m_Shl(m_Power2(), m_Value())))) {
+ Actions.push_back(UDivFoldAction(foldUDivShl, Op1));
+ return Actions.size();
}
+ // The remaining tests are all recursive, so bail out if we hit the limit.
+ if (Depth++ == MaxDepth)
+ return 0;
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+ if (size_t LHSIdx = visitUDivOperand(Op0, SI->getOperand(1), I, Actions))
+ if (visitUDivOperand(Op0, SI->getOperand(2), I, Actions)) {
+ Actions.push_back(UDivFoldAction((FoldUDivOperandCb)0, Op1, LHSIdx-1));
+ return Actions.size();
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyUDivInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // Handle the integer div common cases
+ if (Instruction *Common = commonIDivTransforms(I))
+ return Common;
+
// (x lshr C1) udiv C2 --> x udiv (C2 << C1)
if (ConstantInt *C2 = dyn_cast<ConstantInt>(Op1)) {
Value *X;
@@ -747,38 +863,6 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
}
}
- // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2)
- { const APInt *CI; Value *N;
- if (match(Op1, m_Shl(m_Power2(CI), m_Value(N))) ||
- match(Op1, m_ZExt(m_Shl(m_Power2(CI), m_Value(N))))) {
- if (*CI != 1)
- N = Builder->CreateAdd(N,
- ConstantInt::get(N->getType(), CI->logBase2()));
- if (ZExtInst *Z = dyn_cast<ZExtInst>(Op1))
- N = Builder->CreateZExt(N, Z->getDestTy());
- if (I.isExact())
- return BinaryOperator::CreateExactLShr(Op0, N);
- return BinaryOperator::CreateLShr(Op0, N);
- }
- }
-
- // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
- // where C1&C2 are powers of two.
- { Value *Cond; const APInt *C1, *C2;
- if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) {
- // Construct the "on true" case of the select
- Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t",
- I.isExact());
-
- // Construct the "on false" case of the select
- Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f",
- I.isExact());
-
- // construct the select instruction and return it.
- return SelectInst::Create(Cond, TSI, FSI);
- }
- }
-
// (zext A) udiv (zext B) --> zext (A udiv B)
if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
@@ -786,6 +870,37 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
I.isExact()),
I.getType());
+ // (LHS udiv (select (select (...)))) -> (LHS >> (select (select (...))))
+ SmallVector<UDivFoldAction, 6> UDivActions;
+ if (visitUDivOperand(Op0, Op1, I, UDivActions))
+ for (unsigned i = 0, e = UDivActions.size(); i != e; ++i) {
+ FoldUDivOperandCb Action = UDivActions[i].FoldAction;
+ Value *ActionOp1 = UDivActions[i].OperandToFold;
+ Instruction *Inst;
+ if (Action)
+ Inst = Action(Op0, ActionOp1, I, *this);
+ else {
+ // This action joins two actions together. The RHS of this action is
+ // simply the last action we processed, we saved the LHS action index in
+ // the joining action.
+ size_t SelectRHSIdx = i - 1;
+ Value *SelectRHS = UDivActions[SelectRHSIdx].FoldResult;
+ size_t SelectLHSIdx = UDivActions[i].SelectLHSIdx;
+ Value *SelectLHS = UDivActions[SelectLHSIdx].FoldResult;
+ Inst = SelectInst::Create(cast<SelectInst>(ActionOp1)->getCondition(),
+ SelectLHS, SelectRHS);
+ }
+
+ // If this is the last action to process, return it to the InstCombiner.
+ // Otherwise, we insert it before the UDiv and record it so that we may
+ // use it as part of a joining action (i.e., a SelectInst).
+ if (e - i != 1) {
+ Inst->insertBefore(&I);
+ UDivActions[i].FoldResult = Inst;
+ } else
+ return Inst;
+ }
+
return 0;
}
@@ -846,7 +961,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
/// FP value and:
/// 1) 1/C is exact, or
/// 2) reciprocal is allowed.
-/// If the convertion was successful, the simplified expression "X * 1/C" is
+/// If the conversion was successful, the simplified expression "X * 1/C" is
/// returned; otherwise, NULL is returned.
///
static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
@@ -856,7 +971,7 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
APFloat Reciprocal(FpVal.getSemantics());
bool Cvt = FpVal.getExactInverse(&Reciprocal);
- if (!Cvt && AllowReciprocal && FpVal.isNormal()) {
+ if (!Cvt && AllowReciprocal && FpVal.isFiniteNonZero()) {
Reciprocal = APFloat(FpVal.getSemantics(), 1.0f);
(void)Reciprocal.divide(FpVal, APFloat::rmNearestTiesToEven);
Cvt = !Reciprocal.isDenormal();
@@ -876,10 +991,19 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
if (Value *V = SimplifyFDivInst(Op0, Op1, TD))
return ReplaceInstUsesWith(I, V);
+ if (isa<Constant>(Op0))
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
bool AllowReassociate = I.hasUnsafeAlgebra();
bool AllowReciprocal = I.hasAllowReciprocal();
if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
if (AllowReassociate) {
ConstantFP *C1 = 0;
ConstantFP *C2 = Op1C;
@@ -891,14 +1015,14 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
//
Constant *C = ConstantExpr::getFDiv(C1, C2);
const APFloat &F = cast<ConstantFP>(C)->getValueAPF();
- if (F.isNormal() && !F.isDenormal())
+ if (F.isNormal())
Res = BinaryOperator::CreateFMul(X, C);
} else if (match(Op0, m_FDiv(m_Value(X), m_ConstantFP(C1)))) {
// (X/C1)/C2 => X /(C2*C1) [=> X * 1/(C2*C1) if reciprocal is allowed]
//
Constant *C = ConstantExpr::getFMul(C1, C2);
const APFloat &F = cast<ConstantFP>(C)->getValueAPF();
- if (F.isNormal() && !F.isDenormal()) {
+ if (F.isNormal()) {
Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C),
AllowReciprocal);
if (!Res)
@@ -939,7 +1063,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
if (Fold) {
const APFloat &FoldC = cast<ConstantFP>(Fold)->getValueAPF();
- if (FoldC.isNormal() && !FoldC.isDenormal()) {
+ if (FoldC.isNormal()) {
Instruction *R = CreateDiv ?
BinaryOperator::CreateFDiv(Fold, X) :
BinaryOperator::CreateFMul(X, Fold);
@@ -1027,37 +1151,26 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
if (Instruction *common = commonIRemTransforms(I))
return common;
- // X urem C^2 -> X and C-1
- { const APInt *C;
- if (match(Op1, m_Power2(C)))
- return BinaryOperator::CreateAnd(Op0,
- ConstantInt::get(I.getType(), *C-1));
- }
+ // (zext A) urem (zext B) --> zext (A urem B)
+ if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
+ if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
+ return new ZExtInst(Builder->CreateURem(ZOp0->getOperand(0), ZOp1),
+ I.getType());
- // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
- if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
+ // X urem Y -> X and Y-1, where Y is a power of 2,
+ if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/true)) {
Constant *N1 = Constant::getAllOnesValue(I.getType());
Value *Add = Builder->CreateAdd(Op1, N1);
return BinaryOperator::CreateAnd(Op0, Add);
}
- // urem X, (select Cond, 2^C1, 2^C2) -->
- // select Cond, (and X, C1-1), (and X, C2-1)
- // when C1&C2 are powers of two.
- { Value *Cond; const APInt *C1, *C2;
- if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) {
- Value *TrueAnd = Builder->CreateAnd(Op0, *C1-1, Op1->getName()+".t");
- Value *FalseAnd = Builder->CreateAnd(Op0, *C2-1, Op1->getName()+".f");
- return SelectInst::Create(Cond, TrueAnd, FalseAnd);
- }
+ // 1 urem X -> zext(X != 1)
+ if (match(Op0, m_One())) {
+ Value *Cmp = Builder->CreateICmpNE(Op1, Op0);
+ Value *Ext = Builder->CreateZExt(Cmp, I.getType());
+ return ReplaceInstUsesWith(I, Ext);
}
- // (zext A) urem (zext B) --> zext (A urem B)
- if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
- if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
- return new ZExtInst(Builder->CreateURem(ZOp0->getOperand(0), ZOp1),
- I.getType());
-
return 0;
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index bd14e81..4c6d0c4 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -604,8 +604,6 @@ namespace llvm {
LHS.Width == RHS.Width;
}
};
- template <>
- struct isPodLike<LoweredPHIRecord> { static const bool value = true; };
}
@@ -688,10 +686,10 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// extracted out of it. First, sort the users by their offset and size.
array_pod_sort(PHIUsers.begin(), PHIUsers.end());
- DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
- for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
- errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
- );
+ DEBUG(dbgs() << "SLICING UP PHI: " << FirstPhi << '\n';
+ for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+ dbgs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] << '\n';
+ );
// PredValues - This is a temporary used when rewriting PHI nodes. It is
// hoisted out here to avoid construction/destruction thrashing.
@@ -772,7 +770,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
}
PredValues.clear();
- DEBUG(errs() << " Made element PHI for offset " << Offset << ": "
+ DEBUG(dbgs() << " Made element PHI for offset " << Offset << ": "
<< *EltPHI << '\n');
ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
}
@@ -792,7 +790,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// PHINode simplification
//
Instruction *InstCombiner::visitPHINode(PHINode &PN) {
- if (Value *V = SimplifyInstruction(&PN, TD))
+ if (Value *V = SimplifyInstruction(&PN, TD, TLI))
return ReplaceInstUsesWith(PN, V);
// If all PHI operands are the same operation, pull them through the PHI,
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 59502fb..283bec2 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -367,7 +367,7 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
Value *FalseVal,
InstCombiner::BuilderTy *Builder) {
const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
- if (!IC || !IC->isEquality())
+ if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy())
return 0;
Value *CmpLHS = IC->getOperand(0);
@@ -662,7 +662,7 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
ConstantInt *FalseVal,
InstCombiner::BuilderTy *Builder) {
const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
- if (!IC || !IC->isEquality())
+ if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy())
return 0;
if (!match(IC->getOperand(1), m_Zero()))
@@ -670,8 +670,7 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
ConstantInt *AndRHS;
Value *LHS = IC->getOperand(0);
- if (LHS->getType() != SI.getType() ||
- !match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS))))
+ if (!match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS))))
return 0;
// If both select arms are non-zero see if we have a select of the form
@@ -705,7 +704,13 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
unsigned ValZeros = ValC->getValue().logBase2();
unsigned AndZeros = AndRHS->getValue().logBase2();
- Value *V = LHS;
+ // If types don't match we can still convert the select by introducing a zext
+ // or a trunc of the 'and'. The trunc case requires that all of the truncated
+ // bits are zero, we can figure that out by looking at the 'and' mask.
+ if (AndZeros >= ValC->getBitWidth())
+ return 0;
+
+ Value *V = Builder->CreateZExtOrTrunc(LHS, SI.getType());
if (ValZeros > AndZeros)
V = Builder->CreateShl(V, ValZeros - AndZeros);
else if (ValZeros < AndZeros)
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 60d672b..c831ddd 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -754,7 +754,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
// If it's known zero, our sign bit is also zero.
if (LHSKnownZero.isNegative())
- KnownZero |= LHSKnownZero;
+ KnownZero.setBit(KnownZero.getBitWidth() - 1);
}
break;
case Instruction::URem: {
@@ -808,7 +808,6 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// TODO: Could compute known zero/one bits based on the input.
break;
}
- case Intrinsic::x86_sse42_crc32_64_8:
case Intrinsic::x86_sse42_crc32_64_64:
KnownZero = APInt::getHighBitsSet(64, 32);
return 0;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 4301ddb..1e72410 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -106,8 +106,8 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
}
// If we have a PHI node with a vector type that has only 2 uses: feed
-// itself and be an operand of extractelemnt at a constant location,
-// try to replace the PHI of the vector type with a PHI of a scalar type
+// itself and be an operand of extractelement at a constant location,
+// try to replace the PHI of the vector type with a PHI of a scalar type.
Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
// Verify that the PHI node has exactly 2 uses. Otherwise return NULL.
if (!PN->hasNUses(2))
@@ -125,17 +125,15 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
// and that it is a binary operation which is cheap to scalarize.
// otherwise return NULL.
if (!PHIUser->hasOneUse() || !(PHIUser->use_back() == PN) ||
- !(isa<BinaryOperator>(PHIUser)) ||
- !CheapToScalarize(PHIUser, true))
+ !(isa<BinaryOperator>(PHIUser)) || !CheapToScalarize(PHIUser, true))
return NULL;
// Create a scalar PHI node that will replace the vector PHI node
// just before the current PHI node.
- PHINode * scalarPHI = cast<PHINode>(
- InsertNewInstWith(PHINode::Create(EI.getType(),
- PN->getNumIncomingValues(), ""), *PN));
+ PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
+ PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
// Scalarize each PHI operand.
- for (unsigned i=0; i < PN->getNumIncomingValues(); i++) {
+ for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
Value *PHIInVal = PN->getIncomingValue(i);
BasicBlock *inBB = PN->getIncomingBlock(i);
Value *Elt = EI.getIndexOperand();
@@ -145,17 +143,17 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
// scalar PHI and the second operand is extracted from the other
// vector operand.
BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
- unsigned opId = (B0->getOperand(0) == PN) ? 1: 0;
- Value *Op = Builder->CreateExtractElement(
- B0->getOperand(opId), Elt, B0->getOperand(opId)->getName()+".Elt");
+ unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
+ Value *Op = InsertNewInstWith(
+ ExtractElementInst::Create(B0->getOperand(opId), Elt,
+ B0->getOperand(opId)->getName() + ".Elt"),
+ *B0);
Value *newPHIUser = InsertNewInstWith(
- BinaryOperator::Create(B0->getOpcode(), scalarPHI,Op),
- *B0);
+ BinaryOperator::Create(B0->getOpcode(), scalarPHI, Op), *B0);
scalarPHI->addIncoming(newPHIUser, inBB);
} else {
// Scalarize PHI input:
- Instruction *newEI =
- ExtractElementInst::Create(PHIInVal, Elt, "");
+ Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
// Insert the new instruction into the predecessor basic block.
Instruction *pos = dyn_cast<Instruction>(PHIInVal);
BasicBlock::iterator InsertPos;
@@ -224,7 +222,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) {
Instruction *scalarPHI = scalarizePHI(EI, PN);
if (scalarPHI)
- return (scalarPHI);
+ return scalarPHI;
}
}
@@ -284,6 +282,38 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
Worklist.AddValue(EE);
return CastInst::Create(CI->getOpcode(), EE, EI.getType());
}
+ } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ if (SI->hasOneUse()) {
+ // TODO: For a select on vectors, it might be useful to do this if it
+ // has multiple extractelement uses. For vector select, that seems to
+ // fight the vectorizer.
+
+ // If we are extracting an element from a vector select or a select on
+ // vectors, a select on the scalars extracted from the vector arguments.
+ Value *TrueVal = SI->getTrueValue();
+ Value *FalseVal = SI->getFalseValue();
+
+ Value *Cond = SI->getCondition();
+ if (Cond->getType()->isVectorTy()) {
+ Cond = Builder->CreateExtractElement(Cond,
+ EI.getIndexOperand(),
+ Cond->getName() + ".elt");
+ }
+
+ Value *V1Elem
+ = Builder->CreateExtractElement(TrueVal,
+ EI.getIndexOperand(),
+ TrueVal->getName() + ".elt");
+
+ Value *V2Elem
+ = Builder->CreateExtractElement(FalseVal,
+ EI.getIndexOperand(),
+ FalseVal->getName() + ".elt");
+ return SelectInst::Create(Cond,
+ V1Elem,
+ V2Elem,
+ SI->getName() + ".elt");
+ }
}
}
return 0;
@@ -296,7 +326,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
SmallVectorImpl<Constant*> &Mask) {
assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
"Invalid CollectSingleShuffleElements");
- unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+ unsigned NumElts = V->getType()->getVectorNumElements();
if (isa<UndefValue>(V)) {
Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
@@ -496,6 +526,254 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
return 0;
}
+/// Return true if we can evaluate the specified expression tree if the vector
+/// elements were shuffled in a different order.
+static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
+ unsigned Depth = 5) {
+ // We can always reorder the elements of a constant.
+ if (isa<Constant>(V))
+ return true;
+
+ // We won't reorder vector arguments. No IPO here.
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // Two users may expect different orders of the elements. Don't try it.
+ if (!I->hasOneUse())
+ return false;
+
+ if (Depth == 0) return false;
+
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::GetElementPtr: {
+ for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
+ if (!CanEvaluateShuffled(I->getOperand(i), Mask, Depth-1))
+ return false;
+ }
+ return true;
+ }
+ case Instruction::InsertElement: {
+ ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
+ if (!CI) return false;
+ int ElementNumber = CI->getLimitedValue();
+
+ // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
+ // can't put an element into multiple indices.
+ bool SeenOnce = false;
+ for (int i = 0, e = Mask.size(); i != e; ++i) {
+ if (Mask[i] == ElementNumber) {
+ if (SeenOnce)
+ return false;
+ SeenOnce = true;
+ }
+ }
+ return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1);
+ }
+ }
+ return false;
+}
+
+/// Rebuild a new instruction just like 'I' but with the new operands given.
+/// In the event of type mismatch, the type of the operands is correct.
+static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) {
+ // We don't want to use the IRBuilder here because we want the replacement
+ // instructions to appear next to 'I', not the builder's insertion point.
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ BinaryOperator *BO = cast<BinaryOperator>(I);
+ assert(NewOps.size() == 2 && "binary operator with #ops != 2");
+ BinaryOperator *New =
+ BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
+ NewOps[0], NewOps[1], "", BO);
+ if (isa<OverflowingBinaryOperator>(BO)) {
+ New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
+ New->setHasNoSignedWrap(BO->hasNoSignedWrap());
+ }
+ if (isa<PossiblyExactOperator>(BO)) {
+ New->setIsExact(BO->isExact());
+ }
+ return New;
+ }
+ case Instruction::ICmp:
+ assert(NewOps.size() == 2 && "icmp with #ops != 2");
+ return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
+ NewOps[0], NewOps[1]);
+ case Instruction::FCmp:
+ assert(NewOps.size() == 2 && "fcmp with #ops != 2");
+ return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
+ NewOps[0], NewOps[1]);
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt: {
+ // It's possible that the mask has a different number of elements from
+ // the original cast. We recompute the destination type to match the mask.
+ Type *DestTy =
+ VectorType::get(I->getType()->getScalarType(),
+ NewOps[0]->getType()->getVectorNumElements());
+ assert(NewOps.size() == 1 && "cast with #ops != 1");
+ return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
+ "", I);
+ }
+ case Instruction::GetElementPtr: {
+ Value *Ptr = NewOps[0];
+ ArrayRef<Value*> Idx = NewOps.slice(1);
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(Ptr, Idx, "", I);
+ GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
+ return GEP;
+ }
+ }
+ llvm_unreachable("failed to rebuild vector instructions");
+}
+
+Value *
+InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
+ // Mask.size() does not need to be equal to the number of vector elements.
+
+ assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
+ if (isa<UndefValue>(V)) {
+ return UndefValue::get(VectorType::get(V->getType()->getScalarType(),
+ Mask.size()));
+ }
+ if (isa<ConstantAggregateZero>(V)) {
+ return ConstantAggregateZero::get(
+ VectorType::get(V->getType()->getScalarType(),
+ Mask.size()));
+ }
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ SmallVector<Constant *, 16> MaskValues;
+ for (int i = 0, e = Mask.size(); i != e; ++i) {
+ if (Mask[i] == -1)
+ MaskValues.push_back(UndefValue::get(Builder->getInt32Ty()));
+ else
+ MaskValues.push_back(Builder->getInt32(Mask[i]));
+ }
+ return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),
+ ConstantVector::get(MaskValues));
+ }
+
+ Instruction *I = cast<Instruction>(V);
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::Select:
+ case Instruction::GetElementPtr: {
+ SmallVector<Value*, 8> NewOps;
+ bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
+ for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *V = EvaluateInDifferentElementOrder(I->getOperand(i), Mask);
+ NewOps.push_back(V);
+ NeedsRebuild |= (V != I->getOperand(i));
+ }
+ if (NeedsRebuild) {
+ return BuildNew(I, NewOps);
+ }
+ return I;
+ }
+ case Instruction::InsertElement: {
+ int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
+
+ // The insertelement was inserting at Element. Figure out which element
+ // that becomes after shuffling. The answer is guaranteed to be unique
+ // by CanEvaluateShuffled.
+ bool Found = false;
+ int Index = 0;
+ for (int e = Mask.size(); Index != e; ++Index) {
+ if (Mask[Index] == Element) {
+ Found = true;
+ break;
+ }
+ }
+
+ if (!Found)
+ return UndefValue::get(
+ VectorType::get(V->getType()->getScalarType(), Mask.size()));
+
+ Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
+ return InsertElementInst::Create(V, I->getOperand(1),
+ Builder->getInt32(Index), "", I);
+ }
+ }
+ llvm_unreachable("failed to reorder elements of vector instruction!");
+}
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Value *LHS = SVI.getOperand(0);
@@ -527,9 +805,9 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (LHS == RHS || isa<UndefValue>(LHS)) {
if (isa<UndefValue>(LHS) && LHS == RHS) {
// shuffle(undef,undef,mask) -> undef.
- Value* result = (VWidth == LHSWidth)
+ Value *Result = (VWidth == LHSWidth)
? LHS : UndefValue::get(SVI.getType());
- return ReplaceInstUsesWith(SVI, result);
+ return ReplaceInstUsesWith(SVI, Result);
}
// Remap any references to RHS to use LHS.
@@ -576,6 +854,11 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
}
+ if (isa<UndefValue>(RHS) && CanEvaluateShuffled(LHS, Mask)) {
+ Value *V = EvaluateInDifferentElementOrder(LHS, Mask);
+ return ReplaceInstUsesWith(SVI, V);
+ }
+
// If the LHS is a shufflevector itself, see if we can combine it with this
// one without producing an unusual shuffle.
// Cases that might be simplified:
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
index 49efce5..f84db27 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -1,4 +1,4 @@
-//===- InstCombineWorklist.h - Worklist for the InstCombine pass ----------===//
+//===- InstCombineWorklist.h - Worklist for InstCombine pass ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -37,7 +37,7 @@ public:
/// in it.
void Add(Instruction *I) {
if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
- DEBUG(errs() << "IC: ADD: " << *I << '\n');
+ DEBUG(dbgs() << "IC: ADD: " << *I << '\n');
Worklist.push_back(I);
}
}
@@ -54,7 +54,7 @@ public:
assert(Worklist.empty() && "Worklist must be empty to add initial group");
Worklist.reserve(NumEntries+16);
WorklistMap.resize(NumEntries);
- DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
+ DEBUG(dbgs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
for (unsigned Idx = 0; NumEntries; --NumEntries) {
Instruction *I = List[NumEntries-1];
WorklistMap.insert(std::make_pair(I, Idx++));
@@ -74,8 +74,7 @@ public:
}
Instruction *RemoveOne() {
- Instruction *I = Worklist.back();
- Worklist.pop_back();
+ Instruction *I = Worklist.pop_back_val();
WorklistMap.erase(I);
return I;
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index ec10751..191a101 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -699,7 +699,10 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
Value *InV = 0;
- if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+ // Beware of ConstantExpr: it may eventually evaluate to getNullValue,
+ // even if currently isNullValue gives false.
+ Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i));
+ if (InC && !isa<ConstantExpr>(InC))
InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
else
InV = Builder->CreateSelect(PN->getIncomingValue(i),
@@ -755,19 +758,25 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
return ReplaceInstUsesWith(I, NewPN);
}
-/// FindElementAtOffset - Given a type and a constant offset, determine whether
-/// or not there is a sequence of GEP indices into the type that will land us at
-/// the specified offset. If so, fill them into NewIndices and return the
-/// resultant element type, otherwise return null.
-Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset,
- SmallVectorImpl<Value*> &NewIndices) {
- if (!TD) return 0;
- if (!Ty->isSized()) return 0;
+/// FindElementAtOffset - Given a pointer type and a constant offset, determine
+/// whether or not there is a sequence of GEP indices into the pointed type that
+/// will land us at the specified offset. If so, fill them into NewIndices and
+/// return the resultant element type, otherwise return null.
+Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
+ SmallVectorImpl<Value*> &NewIndices) {
+ assert(PtrTy->isPtrOrPtrVectorTy());
+
+ if (!TD)
+ return 0;
+
+ Type *Ty = PtrTy->getPointerElementType();
+ if (!Ty->isSized())
+ return 0;
// Start with the index over the outer type. Note that the type size
// might be zero (even if the offset isn't zero) if the indexed type
// is something like [0 x {int, int}]
- Type *IntPtrTy = TD->getIntPtrType(Ty->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(PtrTy);
int64_t FirstIdx = 0;
if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
FirstIdx = Offset/TySize;
@@ -1176,6 +1185,22 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName());
}
+ // Canonicalize (gep i8* X, -(ptrtoint Y)) to (sub (ptrtoint X), (ptrtoint Y))
+ // The GEP pattern is emitted by the SCEV expander for certain kinds of
+ // pointer arithmetic.
+ if (TD && GEP.getNumIndices() == 1 &&
+ match(GEP.getOperand(1), m_Neg(m_PtrToInt(m_Value())))) {
+ unsigned AS = GEP.getPointerAddressSpace();
+ if (GEP.getType() == Builder->getInt8PtrTy(AS) &&
+ GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
+ TD->getPointerSizeInBits(AS)) {
+ Operator *Index = cast<Operator>(GEP.getOperand(1));
+ Value *PtrToInt = Builder->CreatePtrToInt(PtrOp, Index->getType());
+ Value *NewSub = Builder->CreateSub(PtrToInt, Index->getOperand(1));
+ return CastInst::Create(Instruction::IntToPtr, NewSub, GEP.getType());
+ }
+ }
+
// Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
Value *StrippedPtr = PtrOp->stripPointerCasts();
PointerType *StrippedPtrTy = dyn_cast<PointerType>(StrippedPtr->getType());
@@ -1231,13 +1256,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
// into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
Type *SrcElTy = StrippedPtrTy->getElementType();
- Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
+ Type *ResElTy = PtrOp->getType()->getPointerElementType();
if (TD && SrcElTy->isArrayTy() &&
- TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
+ TD->getTypeAllocSize(SrcElTy->getArrayElementType()) ==
TD->getTypeAllocSize(ResElTy)) {
- Value *Idx[2];
- Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
- Idx[1] = GEP.getOperand(1);
+ Type *IdxType = TD->getIntPtrType(GEP.getType());
+ Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
Value *NewGEP = GEP.isInBounds() ?
Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
@@ -1261,7 +1285,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Earlier transforms ensure that the index has type IntPtrType, which
// considerably simplifies the logic by eliminating implicit casts.
- assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) &&
+ assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) &&
"Index not cast to pointer width?");
bool NSW;
@@ -1287,8 +1311,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Check that changing to the array element type amounts to dividing the
// index by a scale factor.
uint64_t ResSize = TD->getTypeAllocSize(ResElTy);
- uint64_t ArrayEltSize =
- TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
+ uint64_t ArrayEltSize
+ = TD->getTypeAllocSize(SrcElTy->getArrayElementType());
if (ResSize && ArrayEltSize % ResSize == 0) {
Value *Idx = GEP.getOperand(1);
unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -1296,7 +1320,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Earlier transforms ensure that the index has type IntPtrType, which
// considerably simplifies the logic by eliminating implicit casts.
- assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) &&
+ assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) &&
"Index not cast to pointer width?");
bool NSW;
@@ -1304,9 +1328,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
// If the multiplication NewIdx * Scale may overflow then the new
// GEP may not be "inbounds".
- Value *Off[2];
- Off[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
- Off[1] = NewIdx;
+ Value *Off[2] = {
+ Constant::getNullValue(TD->getIntPtrType(GEP.getType())),
+ NewIdx
+ };
+
Value *NewGEP = GEP.isInBounds() && NSW ?
Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) :
Builder->CreateGEP(StrippedPtr, Off, GEP.getName());
@@ -1318,15 +1344,20 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
}
+ if (!TD)
+ return 0;
+
/// See if we can simplify:
/// X = bitcast A* to B*
/// Y = gep X, <...constant indices...>
/// into a gep of the original struct. This is important for SROA and alias
/// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
- APInt Offset(TD ? TD->getPointerSizeInBits() : 1, 0);
- if (TD &&
- !isa<BitCastInst>(BCI->getOperand(0)) &&
+ Value *Operand = BCI->getOperand(0);
+ PointerType *OpType = cast<PointerType>(Operand->getType());
+ unsigned OffsetBits = TD->getPointerTypeSizeInBits(OpType);
+ APInt Offset(OffsetBits, 0);
+ if (!isa<BitCastInst>(Operand) &&
GEP.accumulateConstantOffset(*TD, Offset) &&
StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
@@ -1335,8 +1366,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (!Offset) {
// If the bitcast is of an allocation, and the allocation will be
// converted to match the type of the cast, don't touch this.
- if (isa<AllocaInst>(BCI->getOperand(0)) ||
- isAllocationFn(BCI->getOperand(0), TLI)) {
+ if (isa<AllocaInst>(Operand) || isAllocationFn(Operand, TLI)) {
// See if the bitcast simplifies, if so, don't nuke this GEP yet.
if (Instruction *I = visitBitCast(*BCI)) {
if (I != BCI) {
@@ -1347,19 +1377,17 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
return &GEP;
}
}
- return new BitCastInst(BCI->getOperand(0), GEP.getType());
+ return new BitCastInst(Operand, GEP.getType());
}
// Otherwise, if the offset is non-zero, we need to find out if there is a
// field at Offset in 'A's type. If so, we can pull the cast through the
// GEP.
SmallVector<Value*, 8> NewIndices;
- Type *InTy =
- cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
- if (FindElementAtOffset(InTy, Offset.getSExtValue(), NewIndices)) {
+ if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) {
Value *NGEP = GEP.isInBounds() ?
- Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) :
- Builder->CreateGEP(BCI->getOperand(0), NewIndices);
+ Builder->CreateInBoundsGEP(Operand, NewIndices) :
+ Builder->CreateGEP(Operand, NewIndices);
if (NGEP->getType() == GEP.getType())
return ReplaceInstUsesWith(GEP, NGEP);
@@ -1372,8 +1400,6 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
return 0;
}
-
-
static bool
isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
const TargetLibraryInfo *TLI) {
@@ -2042,7 +2068,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
continue;
// If Filter is a subset of LFilter, i.e. every element of Filter is also
// an element of LFilter, then discard LFilter.
- SmallVector<Value *, 16>::iterator J = NewClauses.begin() + j;
+ SmallVectorImpl<Value *>::iterator J = NewClauses.begin() + j;
// If Filter is empty then it is a subset of LFilter.
if (!FElts) {
// Discard LFilter.
@@ -2209,7 +2235,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
// DCE instruction if trivially dead.
if (isInstructionTriviallyDead(Inst, TLI)) {
++NumDeadInst;
- DEBUG(errs() << "IC: DCE: " << *Inst << '\n');
+ DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
Inst->eraseFromParent();
continue;
}
@@ -2217,7 +2243,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
// ConstantProp instruction if trivially constant.
if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
if (Constant *C = ConstantFoldInstruction(Inst, TD, TLI)) {
- DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
+ DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: "
<< *Inst << '\n');
Inst->replaceAllUsesWith(C);
++NumConstProp;
@@ -2293,7 +2319,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
MadeIRChange = false;
- DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
+ DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
<< F.getName() << "\n");
{
@@ -2338,7 +2364,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// Check to see if we can DCE the instruction.
if (isInstructionTriviallyDead(I, TLI)) {
- DEBUG(errs() << "IC: DCE: " << *I << '\n');
+ DEBUG(dbgs() << "IC: DCE: " << *I << '\n');
EraseInstFromFunction(*I);
++NumDeadInst;
MadeIRChange = true;
@@ -2348,7 +2374,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// Instruction isn't dead, see if we can constant propagate it.
if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) {
- DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
+ DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
// Add operands to the worklist.
ReplaceInstUsesWith(*I, C);
@@ -2396,13 +2422,13 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
std::string OrigI;
#endif
DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
- DEBUG(errs() << "IC: Visiting: " << OrigI << '\n');
+ DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
if (Instruction *Result = visit(*I)) {
++NumCombined;
// Should we replace the old instruction with a new one?
if (Result != I) {
- DEBUG(errs() << "IC: Old = " << *I << '\n'
+ DEBUG(dbgs() << "IC: Old = " << *I << '\n'
<< " New = " << *Result << '\n');
if (!I->getDebugLoc().isUnknown())
@@ -2431,7 +2457,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
EraseInstFromFunction(*I);
} else {
#ifndef NDEBUG
- DEBUG(errs() << "IC: Mod = " << OrigI << '\n'
+ DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
<< " New = " << *I << '\n');
#endif
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 623c470..d731ec5 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/DIBuilder.h"
@@ -39,13 +40,14 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/system_error.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/BlackList.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Transforms/Utils/SpecialCaseList.h"
#include <algorithm>
#include <string>
@@ -56,36 +58,49 @@ static const uint64_t kDefaultShadowOffset32 = 1ULL << 29;
static const uint64_t kDefaultShadowOffset64 = 1ULL << 44;
static const uint64_t kDefaultShort64bitShadowOffset = 0x7FFF8000; // < 2G.
static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41;
+static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa8000;
+static const size_t kMinStackMallocSize = 1 << 6; // 64B
static const size_t kMaxStackMallocSize = 1 << 16; // 64K
static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3;
static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E;
-static const char *kAsanModuleCtorName = "asan.module_ctor";
-static const char *kAsanModuleDtorName = "asan.module_dtor";
-static const int kAsanCtorAndCtorPriority = 1;
-static const char *kAsanReportErrorTemplate = "__asan_report_";
-static const char *kAsanReportLoadN = "__asan_report_load_n";
-static const char *kAsanReportStoreN = "__asan_report_store_n";
-static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
-static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
-static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
-static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
-static const char *kAsanInitName = "__asan_init_v3";
-static const char *kAsanHandleNoReturnName = "__asan_handle_no_return";
-static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
-static const char *kAsanMappingScaleName = "__asan_mapping_scale";
-static const char *kAsanStackMallocName = "__asan_stack_malloc";
-static const char *kAsanStackFreeName = "__asan_stack_free";
-static const char *kAsanGenPrefix = "__asan_gen_";
-static const char *kAsanPoisonStackMemoryName = "__asan_poison_stack_memory";
-static const char *kAsanUnpoisonStackMemoryName =
+static const char *const kAsanModuleCtorName = "asan.module_ctor";
+static const char *const kAsanModuleDtorName = "asan.module_dtor";
+static const int kAsanCtorAndCtorPriority = 1;
+static const char *const kAsanReportErrorTemplate = "__asan_report_";
+static const char *const kAsanReportLoadN = "__asan_report_load_n";
+static const char *const kAsanReportStoreN = "__asan_report_store_n";
+static const char *const kAsanRegisterGlobalsName = "__asan_register_globals";
+static const char *const kAsanUnregisterGlobalsName =
+ "__asan_unregister_globals";
+static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
+static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
+static const char *const kAsanInitName = "__asan_init_v3";
+static const char *const kAsanCovName = "__sanitizer_cov";
+static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return";
+static const char *const kAsanMappingOffsetName = "__asan_mapping_offset";
+static const char *const kAsanMappingScaleName = "__asan_mapping_scale";
+static const int kMaxAsanStackMallocSizeClass = 10;
+static const char *const kAsanStackMallocNameTemplate = "__asan_stack_malloc_";
+static const char *const kAsanStackFreeNameTemplate = "__asan_stack_free_";
+static const char *const kAsanGenPrefix = "__asan_gen_";
+static const char *const kAsanPoisonStackMemoryName =
+ "__asan_poison_stack_memory";
+static const char *const kAsanUnpoisonStackMemoryName =
"__asan_unpoison_stack_memory";
+static const char *const kAsanOptionDetectUAR =
+ "__asan_option_detect_stack_use_after_return";
+
+// These constants must match the definitions in the run-time library.
static const int kAsanStackLeftRedzoneMagic = 0xf1;
static const int kAsanStackMidRedzoneMagic = 0xf2;
static const int kAsanStackRightRedzoneMagic = 0xf3;
static const int kAsanStackPartialRedzoneMagic = 0xf4;
+#ifndef NDEBUG
+static const int kAsanStackAfterReturnMagic = 0xf5;
+#endif
// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
static const size_t kNumberOfAccessSizes = 5;
@@ -120,6 +135,8 @@ static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
// This flag may need to be replaced with -f[no]asan-globals.
static cl::opt<bool> ClGlobals("asan-globals",
cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClCoverage("asan-coverage",
+ cl::desc("ASan coverage"), cl::Hidden, cl::init(false));
static cl::opt<bool> ClInitializers("asan-initialization-order",
cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false));
static cl::opt<bool> ClMemIntrin("asan-memintrin",
@@ -130,6 +147,19 @@ static cl::opt<std::string> ClBlacklistFile("asan-blacklist",
cl::desc("File containing the list of objects to ignore "
"during instrumentation"), cl::Hidden);
+// This is an experimental feature that will allow to choose between
+// instrumented and non-instrumented code at link-time.
+// If this option is on, just before instrumenting a function we create its
+// clone; if the function is not changed by asan the clone is deleted.
+// If we end up with a clone, we put the instrumented function into a section
+// called "ASAN" and the uninstrumented function into a section called "NOASAN".
+//
+// This is still a prototype, we need to figure out a way to keep two copies of
+// a function so that the linker can easily choose one of them.
+static cl::opt<bool> ClKeepUninstrumented("asan-keep-uninstrumented-functions",
+ cl::desc("Keep uninstrumented copies of functions"),
+ cl::Hidden, cl::init(false));
+
// These flags allow to change the shadow mapping.
// The shadow mapping looks like
// Shadow = (Mem >> scale) + (1 << offset_log)
@@ -167,6 +197,13 @@ static cl::opt<int> ClDebugMin("asan-debug-min", cl::desc("Debug min inst"),
static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
cl::Hidden, cl::init(-1));
+STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
+STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
+STATISTIC(NumOptimizedAccessesToGlobalArray,
+ "Number of optimized accesses to global arrays");
+STATISTIC(NumOptimizedAccessesToGlobalVar,
+ "Number of optimized accesses to global vars");
+
namespace {
/// A set of dynamically initialized globals extracted from metadata.
class SetOfDynamicallyInitializedGlobals {
@@ -206,8 +243,11 @@ static ShadowMapping getShadowMapping(const Module &M, int LongSize,
llvm::Triple TargetTriple(M.getTargetTriple());
bool IsAndroid = TargetTriple.getEnvironment() == llvm::Triple::Android;
bool IsMacOSX = TargetTriple.getOS() == llvm::Triple::MacOSX;
- bool IsPPC64 = TargetTriple.getArch() == llvm::Triple::ppc64;
+ bool IsPPC64 = TargetTriple.getArch() == llvm::Triple::ppc64 ||
+ TargetTriple.getArch() == llvm::Triple::ppc64le;
bool IsX86_64 = TargetTriple.getArch() == llvm::Triple::x86_64;
+ bool IsMIPS32 = TargetTriple.getArch() == llvm::Triple::mips ||
+ TargetTriple.getArch() == llvm::Triple::mipsel;
ShadowMapping Mapping;
@@ -217,7 +257,8 @@ static ShadowMapping getShadowMapping(const Module &M, int LongSize,
Mapping.OrShadowOffset = !IsPPC64 && !ClShort64BitOffset;
Mapping.Offset = (IsAndroid || ZeroBaseShadow) ? 0 :
- (LongSize == 32 ? kDefaultShadowOffset32 :
+ (LongSize == 32 ?
+ (IsMIPS32 ? kMIPS32_ShadowOffset32 : kDefaultShadowOffset32) :
IsPPC64 ? kPPC64_ShadowOffset64 : kDefaultShadowOffset64);
if (!ZeroBaseShadow && ClShort64BitOffset && IsX86_64 && !IsMacOSX) {
assert(LongSize == 64);
@@ -285,6 +326,8 @@ struct AddressSanitizer : public FunctionPass {
bool ShouldInstrumentGlobal(GlobalVariable *G);
bool LooksLikeCodeInBug11395(Instruction *I);
void FindDynamicInitializers(Module &M);
+ bool GlobalIsLinkerInitialized(GlobalVariable *G);
+ bool InjectCoverage(Function &F);
bool CheckInitOrder;
bool CheckUseAfterReturn;
@@ -300,7 +343,8 @@ struct AddressSanitizer : public FunctionPass {
Function *AsanCtorFunction;
Function *AsanInitFunction;
Function *AsanHandleNoReturnFunc;
- OwningPtr<BlackList> BL;
+ Function *AsanCovFunction;
+ OwningPtr<SpecialCaseList> BL;
// This array is indexed by AccessIsWrite and log2(AccessSize).
Function *AsanErrorCallback[2][kNumberOfAccessSizes];
// This array is indexed by AccessIsWrite.
@@ -340,7 +384,7 @@ class AddressSanitizerModule : public ModulePass {
SmallString<64> BlacklistFile;
bool ZeroBaseShadow;
- OwningPtr<BlackList> BL;
+ OwningPtr<SpecialCaseList> BL;
SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals;
Type *IntptrTy;
LLVMContext *C;
@@ -375,12 +419,14 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
uint64_t TotalStackSize;
unsigned StackAlignment;
- Function *AsanStackMallocFunc, *AsanStackFreeFunc;
+ Function *AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1],
+ *AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1];
Function *AsanPoisonStackMemoryFunc, *AsanUnpoisonStackMemoryFunc;
// Stores a place and arguments of poisoning/unpoisoning call for alloca.
struct AllocaPoisonCall {
IntrinsicInst *InsBefore;
+ AllocaInst *AI;
uint64_t Size;
bool DoPoison;
};
@@ -433,7 +479,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
StackAlignment = std::max(StackAlignment, AI.getAlignment());
AllocaVec.push_back(&AI);
- uint64_t AlignedSize = getAlignedAllocaSize(&AI);
+ uint64_t AlignedSize = getAlignedAllocaSize(&AI);
TotalStackSize += AlignedSize;
}
@@ -459,7 +505,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
AllocaInst *AI = findAllocaForValue(II.getArgOperand(1));
if (!AI) return;
bool DoPoison = (ID == Intrinsic::lifetime_end);
- AllocaPoisonCall APC = {&II, SizeValue, DoPoison};
+ AllocaPoisonCall APC = {&II, AI, SizeValue, DoPoison};
AllocaPoisonCallVec.push_back(APC);
}
@@ -467,33 +513,37 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
void initializeCallbacks(Module &M);
// Check if we want (and can) handle this alloca.
- bool isInterestingAlloca(AllocaInst &AI) {
+ bool isInterestingAlloca(AllocaInst &AI) const {
return (!AI.isArrayAllocation() &&
AI.isStaticAlloca() &&
+ AI.getAlignment() <= RedzoneSize() &&
AI.getAllocatedType()->isSized());
}
size_t RedzoneSize() const {
return RedzoneSizeForScale(Mapping.Scale);
}
- uint64_t getAllocaSizeInBytes(AllocaInst *AI) {
+ uint64_t getAllocaSizeInBytes(AllocaInst *AI) const {
Type *Ty = AI->getAllocatedType();
uint64_t SizeInBytes = ASan.TD->getTypeAllocSize(Ty);
return SizeInBytes;
}
- uint64_t getAlignedSize(uint64_t SizeInBytes) {
+ uint64_t getAlignedSize(uint64_t SizeInBytes) const {
size_t RZ = RedzoneSize();
return ((SizeInBytes + RZ - 1) / RZ) * RZ;
}
- uint64_t getAlignedAllocaSize(AllocaInst *AI) {
+ uint64_t getAlignedAllocaSize(AllocaInst *AI) const {
uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
return getAlignedSize(SizeInBytes);
}
/// Finds alloca where the value comes from.
AllocaInst *findAllocaForValue(Value *V);
- void poisonRedZones(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB,
+ void poisonRedZones(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> &IRB,
Value *ShadowBase, bool DoPoison);
- void poisonAlloca(Value *V, uint64_t Size, IRBuilder<> IRB, bool DoPoison);
+ void poisonAlloca(Value *V, uint64_t Size, IRBuilder<> &IRB, bool DoPoison);
+
+ void SetShadowToStackAfterReturnInlined(IRBuilder<> &IRB, Value *ShadowBase,
+ int Size);
};
} // namespace
@@ -520,16 +570,16 @@ ModulePass *llvm::createAddressSanitizerModulePass(
}
static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
- size_t Res = CountTrailingZeros_32(TypeSize / 8);
+ size_t Res = countTrailingZeros(TypeSize / 8);
assert(Res < kNumberOfAccessSizes);
return Res;
}
-// Create a constant for Str so that we can pass it to the run-time lib.
+// \brief Create a constant for Str so that we can pass it to the run-time lib.
static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
GlobalVariable *GV = new GlobalVariable(M, StrConst->getType(), true,
- GlobalValue::PrivateLinkage, StrConst,
+ GlobalValue::InternalLinkage, StrConst,
kAsanGenPrefix);
GV->setUnnamedAddr(true); // Ok to merge these.
GV->setAlignment(1); // Strings may not be merged w/o setting align 1.
@@ -620,6 +670,13 @@ static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) {
return NULL;
}
+bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) {
+ // If a global variable does not have dynamic initialization we don't
+ // have to instrument it. However, if a global does not have initializer
+ // at all, we assume it has dynamic initializer (in other TU).
+ return G->hasInitializer() && !DynamicallyInitializedGlobals.Contains(G);
+}
+
void AddressSanitizer::instrumentMop(Instruction *I) {
bool IsWrite = false;
Value *Addr = isInterestingMemoryAccess(I, &IsWrite);
@@ -628,13 +685,19 @@ void AddressSanitizer::instrumentMop(Instruction *I) {
if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) {
// If initialization order checking is disabled, a simple access to a
// dynamically initialized global is always valid.
- if (!CheckInitOrder)
- return;
- // If a global variable does not have dynamic initialization we don't
- // have to instrument it. However, if a global does not have initailizer
- // at all, we assume it has dynamic initializer (in other TU).
- if (G->hasInitializer() && !DynamicallyInitializedGlobals.Contains(G))
+ if (!CheckInitOrder || GlobalIsLinkerInitialized(G)) {
+ NumOptimizedAccessesToGlobalVar++;
return;
+ }
+ }
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr);
+ if (CE && CE->isGEPWithNoNotionalOverIndexing()) {
+ if (GlobalVariable *G = dyn_cast<GlobalVariable>(CE->getOperand(0))) {
+ if (CE->getOperand(1)->isNullValue() && GlobalIsLinkerInitialized(G)) {
+ NumOptimizedAccessesToGlobalArray++;
+ return;
+ }
+ }
}
}
@@ -646,6 +709,11 @@ void AddressSanitizer::instrumentMop(Instruction *I) {
assert((TypeSize % 8) == 0);
+ if (IsWrite)
+ NumInstrumentedWrites++;
+ else
+ NumInstrumentedReads++;
+
// Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check.
if (TypeSize == 8 || TypeSize == 16 ||
TypeSize == 32 || TypeSize == 64 || TypeSize == 128)
@@ -861,7 +929,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
TD = getAnalysisIfAvailable<DataLayout>();
if (!TD)
return false;
- BL.reset(new BlackList(BlacklistFile));
+ BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
if (BL->isIn(M)) return false;
C = &(M.getContext());
int LongSize = TD->getPointerSizeInBits();
@@ -892,8 +960,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
IntptrTy, IntptrTy,
IntptrTy, IntptrTy, NULL);
- SmallVector<Constant *, 16> Initializers(n), DynamicInit;
-
+ SmallVector<Constant *, 16> Initializers(n);
Function *CtorFunc = M.getFunction(kAsanModuleCtorName);
assert(CtorFunc);
@@ -929,7 +996,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
bool GlobalHasDynamicInitializer =
DynamicallyInitializedGlobals.Contains(G);
// Don't check initialization order if this global is blacklisted.
- GlobalHasDynamicInitializer &= !BL->isInInit(*G);
+ GlobalHasDynamicInitializer &= !BL->isIn(*G, "init");
StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL);
Constant *NewInitializer = ConstantStruct::get(
@@ -939,8 +1006,11 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
GlobalVariable *Name = createPrivateGlobalForString(M, G->getName());
// Create a new global variable with enough space for a redzone.
+ GlobalValue::LinkageTypes Linkage = G->getLinkage();
+ if (G->isConstant() && Linkage == GlobalValue::PrivateLinkage)
+ Linkage = GlobalValue::InternalLinkage;
GlobalVariable *NewGlobal = new GlobalVariable(
- M, NewTy, G->isConstant(), G->getLinkage(),
+ M, NewTy, G->isConstant(), Linkage,
NewInitializer, "", G, G->getThreadLocalMode());
NewGlobal->copyAttributesFrom(G);
NewGlobal->setAlignment(MinRZ);
@@ -973,7 +1043,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
ArrayType *ArrayOfGlobalStructTy = ArrayType::get(GlobalStructTy, n);
GlobalVariable *AllGlobals = new GlobalVariable(
- M, ArrayOfGlobalStructTy, false, GlobalVariable::PrivateLinkage,
+ M, ArrayOfGlobalStructTy, false, GlobalVariable::InternalLinkage,
ConstantArray::get(ArrayOfGlobalStructTy, Initializers), "");
// Create calls for poisoning before initializers run and unpoisoning after.
@@ -1021,6 +1091,8 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction(
kAsanHandleNoReturnName, IRB.getVoidTy(), NULL));
+ AsanCovFunction = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanCovName, IRB.getVoidTy(), IntptrTy, NULL));
// We insert an empty inline asm after __asan_report* to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
StringRef(""), StringRef(""),
@@ -1051,7 +1123,7 @@ bool AddressSanitizer::doInitialization(Module &M) {
if (!TD)
return false;
- BL.reset(new BlackList(BlacklistFile));
+ BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
DynamicallyInitializedGlobals.Init(M);
C = &(M.getContext());
@@ -1092,6 +1164,47 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
return false;
}
+// Poor man's coverage that works with ASan.
+// We create a Guard boolean variable with the same linkage
+// as the function and inject this code into the entry block:
+// if (*Guard) {
+// __sanitizer_cov(&F);
+// *Guard = 1;
+// }
+// The accesses to Guard are atomic. The rest of the logic is
+// in __sanitizer_cov (it's fine to call it more than once).
+//
+// This coverage implementation provides very limited data:
+// it only tells if a given function was ever executed.
+// No counters, no per-basic-block or per-edge data.
+// But for many use cases this is what we need and the added slowdown
+// is negligible. This simple implementation will probably be obsoleted
+// by the upcoming Clang-based coverage implementation.
+// By having it here and now we hope to
+// a) get the functionality to users earlier and
+// b) collect usage statistics to help improve Clang coverage design.
+bool AddressSanitizer::InjectCoverage(Function &F) {
+ if (!ClCoverage) return false;
+ IRBuilder<> IRB(F.getEntryBlock().getFirstInsertionPt());
+ Type *Int8Ty = IRB.getInt8Ty();
+ GlobalVariable *Guard = new GlobalVariable(
+ *F.getParent(), Int8Ty, false, GlobalValue::PrivateLinkage,
+ Constant::getNullValue(Int8Ty), "__asan_gen_cov_" + F.getName());
+ LoadInst *Load = IRB.CreateLoad(Guard);
+ Load->setAtomic(Monotonic);
+ Load->setAlignment(1);
+ Value *Cmp = IRB.CreateICmpEQ(Constant::getNullValue(Int8Ty), Load);
+ Instruction *Ins = SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
+ IRB.SetInsertPoint(Ins);
+ // We pass &F to __sanitizer_cov. We could avoid this and rely on
+ // GET_CALLER_PC, but having the PC of the first instruction is just nice.
+ IRB.CreateCall(AsanCovFunction, IRB.CreatePointerCast(&F, IntptrTy));
+ StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int8Ty, 1), Guard);
+ Store->setAtomic(Monotonic);
+ Store->setAlignment(1);
+ return true;
+}
+
bool AddressSanitizer::runOnFunction(Function &F) {
if (BL->isIn(F)) return false;
if (&F == AsanCtorFunction) return false;
@@ -1102,8 +1215,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {
// If needed, insert __asan_init before checking for SanitizeAddress attr.
maybeInsertAsanInitAtFunctionEntry(F);
- if (!F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::SanitizeAddress))
+ if (!F.hasFnAttribute(Attribute::SanitizeAddress))
return false;
if (!ClDebugFunc.empty() && ClDebugFunc != F.getName())
@@ -1114,6 +1226,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {
SmallSet<Value*, 16> TempsToInstrument;
SmallVector<Instruction*, 16> ToInstrument;
SmallVector<Instruction*, 8> NoReturnCalls;
+ int NumAllocas = 0;
bool IsWrite;
// Fill the set of memory operations to instrument.
@@ -1132,6 +1245,8 @@ bool AddressSanitizer::runOnFunction(Function &F) {
} else if (isa<MemIntrinsic>(BI) && ClMemIntrin) {
// ok, take it.
} else {
+ if (isa<AllocaInst>(BI))
+ NumAllocas++;
CallSite CS(BI);
if (CS) {
// A call inside BB.
@@ -1148,6 +1263,17 @@ bool AddressSanitizer::runOnFunction(Function &F) {
}
}
+ Function *UninstrumentedDuplicate = 0;
+ bool LikelyToInstrument =
+ !NoReturnCalls.empty() || !ToInstrument.empty() || (NumAllocas > 0);
+ if (ClKeepUninstrumented && LikelyToInstrument) {
+ ValueToValueMapTy VMap;
+ UninstrumentedDuplicate = CloneFunction(&F, VMap, false);
+ UninstrumentedDuplicate->removeFnAttr(Attribute::SanitizeAddress);
+ UninstrumentedDuplicate->setName("NOASAN_" + F.getName());
+ F.getParent()->getFunctionList().push_back(UninstrumentedDuplicate);
+ }
+
// Instrument.
int NumInstrumented = 0;
for (size_t i = 0, n = ToInstrument.size(); i != n; i++) {
@@ -1172,9 +1298,29 @@ bool AddressSanitizer::runOnFunction(Function &F) {
IRBuilder<> IRB(CI);
IRB.CreateCall(AsanHandleNoReturnFunc);
}
- DEBUG(dbgs() << "ASAN done instrumenting:\n" << F << "\n");
- return NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty();
+ bool res = NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty();
+
+ if (InjectCoverage(F))
+ res = true;
+
+ DEBUG(dbgs() << "ASAN done instrumenting: " << res << " " << F << "\n");
+
+ if (ClKeepUninstrumented) {
+ if (!res) {
+ // No instrumentation is done, no need for the duplicate.
+ if (UninstrumentedDuplicate)
+ UninstrumentedDuplicate->eraseFromParent();
+ } else {
+ // The function was instrumented. We must have the duplicate.
+ assert(UninstrumentedDuplicate);
+ UninstrumentedDuplicate->setSection("NOASAN");
+ assert(!F.hasSection());
+ F.setSection("ASAN");
+ }
+ }
+
+ return res;
}
static uint64_t ValueForPoison(uint64_t PoisonByte, size_t ShadowRedzoneSize) {
@@ -1217,11 +1363,15 @@ bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) {
void FunctionStackPoisoner::initializeCallbacks(Module &M) {
IRBuilder<> IRB(*C);
- AsanStackMallocFunc = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL));
- AsanStackFreeFunc = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanStackFreeName, IRB.getVoidTy(),
- IntptrTy, IntptrTy, IntptrTy, NULL));
+ for (int i = 0; i <= kMaxAsanStackMallocSizeClass; i++) {
+ std::string Suffix = itostr(i);
+ AsanStackMallocFunc[i] = checkInterfaceFunction(
+ M.getOrInsertFunction(kAsanStackMallocNameTemplate + Suffix, IntptrTy,
+ IntptrTy, IntptrTy, NULL));
+ AsanStackFreeFunc[i] = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanStackFreeNameTemplate + Suffix, IRB.getVoidTy(), IntptrTy,
+ IntptrTy, IntptrTy, NULL));
+ }
AsanPoisonStackMemoryFunc = checkInterfaceFunction(M.getOrInsertFunction(
kAsanPoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
AsanUnpoisonStackMemoryFunc = checkInterfaceFunction(M.getOrInsertFunction(
@@ -1229,7 +1379,7 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
}
void FunctionStackPoisoner::poisonRedZones(
- const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB, Value *ShadowBase,
+ const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> &IRB, Value *ShadowBase,
bool DoPoison) {
size_t ShadowRZSize = RedzoneSize() >> Mapping.Scale;
assert(ShadowRZSize >= 1 && ShadowRZSize <= 4);
@@ -1270,6 +1420,10 @@ void FunctionStackPoisoner::poisonRedZones(
RedzoneSize(),
1ULL << Mapping.Scale,
kAsanStackPartialRedzoneMagic);
+ Poison =
+ ASan.TD->isLittleEndian()
+ ? support::endian::byte_swap<uint32_t, support::little>(Poison)
+ : support::endian::byte_swap<uint32_t, support::big>(Poison);
}
Value *PartialPoison = ConstantInt::get(RZTy, Poison);
IRB.CreateStore(PartialPoison, IRB.CreateIntToPtr(Ptr, RZPtrTy));
@@ -1286,12 +1440,40 @@ void FunctionStackPoisoner::poisonRedZones(
}
}
+// Fake stack allocator (asan_fake_stack.h) has 11 size classes
+// for every power of 2 from kMinStackMallocSize to kMaxAsanStackMallocSizeClass
+static int StackMallocSizeClass(uint64_t LocalStackSize) {
+ assert(LocalStackSize <= kMaxStackMallocSize);
+ uint64_t MaxSize = kMinStackMallocSize;
+ for (int i = 0; ; i++, MaxSize *= 2)
+ if (LocalStackSize <= MaxSize)
+ return i;
+ llvm_unreachable("impossible LocalStackSize");
+}
+
+// Set Size bytes starting from ShadowBase to kAsanStackAfterReturnMagic.
+// We can not use MemSet intrinsic because it may end up calling the actual
+// memset. Size is a multiple of 8.
+// Currently this generates 8-byte stores on x86_64; it may be better to
+// generate wider stores.
+void FunctionStackPoisoner::SetShadowToStackAfterReturnInlined(
+ IRBuilder<> &IRB, Value *ShadowBase, int Size) {
+ assert(!(Size % 8));
+ assert(kAsanStackAfterReturnMagic == 0xf5);
+ for (int i = 0; i < Size; i += 8) {
+ Value *p = IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i));
+ IRB.CreateStore(ConstantInt::get(IRB.getInt64Ty(), 0xf5f5f5f5f5f5f5f5ULL),
+ IRB.CreateIntToPtr(p, IRB.getInt64Ty()->getPointerTo()));
+ }
+}
+
void FunctionStackPoisoner::poisonStack() {
uint64_t LocalStackSize = TotalStackSize +
(AllocaVec.size() + 1) * RedzoneSize();
bool DoStackMalloc = ASan.CheckUseAfterReturn
&& LocalStackSize <= kMaxStackMallocSize;
+ int StackMallocIdx = -1;
assert(AllocaVec.size() > 0);
Instruction *InsBefore = AllocaVec[0];
@@ -1309,8 +1491,28 @@ void FunctionStackPoisoner::poisonStack() {
Value *LocalStackBase = OrigStackBase;
if (DoStackMalloc) {
- LocalStackBase = IRB.CreateCall2(AsanStackMallocFunc,
+ // LocalStackBase = OrigStackBase
+ // if (__asan_option_detect_stack_use_after_return)
+ // LocalStackBase = __asan_stack_malloc_N(LocalStackBase, OrigStackBase);
+ StackMallocIdx = StackMallocSizeClass(LocalStackSize);
+ assert(StackMallocIdx <= kMaxAsanStackMallocSizeClass);
+ Constant *OptionDetectUAR = F.getParent()->getOrInsertGlobal(
+ kAsanOptionDetectUAR, IRB.getInt32Ty());
+ Value *Cmp = IRB.CreateICmpNE(IRB.CreateLoad(OptionDetectUAR),
+ Constant::getNullValue(IRB.getInt32Ty()));
+ Instruction *Term =
+ SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
+ BasicBlock *CmpBlock = cast<Instruction>(Cmp)->getParent();
+ IRBuilder<> IRBIf(Term);
+ LocalStackBase = IRBIf.CreateCall2(
+ AsanStackMallocFunc[StackMallocIdx],
ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase);
+ BasicBlock *SetBlock = cast<Instruction>(LocalStackBase)->getParent();
+ IRB.SetInsertPoint(InsBefore);
+ PHINode *Phi = IRB.CreatePHI(IntptrTy, 2);
+ Phi->addIncoming(OrigStackBase, CmpBlock);
+ Phi->addIncoming(LocalStackBase, SetBlock);
+ LocalStackBase = Phi;
}
// This string will be parsed by the run-time (DescribeAddressIfStack).
@@ -1322,11 +1524,10 @@ void FunctionStackPoisoner::poisonStack() {
bool HavePoisonedAllocas = false;
for (size_t i = 0, n = AllocaPoisonCallVec.size(); i < n; i++) {
const AllocaPoisonCall &APC = AllocaPoisonCallVec[i];
- IntrinsicInst *II = APC.InsBefore;
- AllocaInst *AI = findAllocaForValue(II->getArgOperand(1));
- assert(AI);
- IRBuilder<> IRB(II);
- poisonAlloca(AI, APC.Size, IRB, APC.DoPoison);
+ assert(APC.InsBefore);
+ assert(APC.AI);
+ IRBuilder<> IRB(APC.InsBefore);
+ poisonAlloca(APC.AI, APC.Size, IRB, APC.DoPoison);
HavePoisonedAllocas |= APC.DoPoison;
}
@@ -1384,10 +1585,35 @@ void FunctionStackPoisoner::poisonStack() {
// Unpoison the stack.
poisonRedZones(AllocaVec, IRBRet, ShadowBase, false);
if (DoStackMalloc) {
+ assert(StackMallocIdx >= 0);
// In use-after-return mode, mark the whole stack frame unaddressable.
- IRBRet.CreateCall3(AsanStackFreeFunc, LocalStackBase,
- ConstantInt::get(IntptrTy, LocalStackSize),
- OrigStackBase);
+ if (StackMallocIdx <= 4) {
+ // For small sizes inline the whole thing:
+ // if LocalStackBase != OrigStackBase:
+ // memset(ShadowBase, kAsanStackAfterReturnMagic, ShadowSize);
+ // **SavedFlagPtr(LocalStackBase) = 0
+ // FIXME: if LocalStackBase != OrigStackBase don't call poisonRedZones.
+ Value *Cmp = IRBRet.CreateICmpNE(LocalStackBase, OrigStackBase);
+ TerminatorInst *PoisonTerm =
+ SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
+ IRBuilder<> IRBPoison(PoisonTerm);
+ int ClassSize = kMinStackMallocSize << StackMallocIdx;
+ SetShadowToStackAfterReturnInlined(IRBPoison, ShadowBase,
+ ClassSize >> Mapping.Scale);
+ Value *SavedFlagPtrPtr = IRBPoison.CreateAdd(
+ LocalStackBase,
+ ConstantInt::get(IntptrTy, ClassSize - ASan.LongSize / 8));
+ Value *SavedFlagPtr = IRBPoison.CreateLoad(
+ IRBPoison.CreateIntToPtr(SavedFlagPtrPtr, IntptrPtrTy));
+ IRBPoison.CreateStore(
+ Constant::getNullValue(IRBPoison.getInt8Ty()),
+ IRBPoison.CreateIntToPtr(SavedFlagPtr, IRBPoison.getInt8PtrTy()));
+ } else {
+ // For larger frames call __asan_stack_free_*.
+ IRBRet.CreateCall3(AsanStackFreeFunc[StackMallocIdx], LocalStackBase,
+ ConstantInt::get(IntptrTy, LocalStackSize),
+ OrigStackBase);
+ }
} else if (HavePoisonedAllocas) {
// If we poisoned some allocas in llvm.lifetime analysis,
// unpoison whole stack frame now.
@@ -1402,7 +1628,7 @@ void FunctionStackPoisoner::poisonStack() {
}
void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
- IRBuilder<> IRB, bool DoPoison) {
+ IRBuilder<> &IRB, bool DoPoison) {
// For now just insert the call to ASan runtime.
Value *AddrArg = IRB.CreatePointerCast(V, IntptrTy);
Value *SizeArg = ConstantInt::get(IntptrTy, Size);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp b/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp
deleted file mode 100644
index 39de4b0..0000000
--- a/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-//===-- BlackList.cpp - blacklist for sanitizers --------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This is a utility class for instrumentation passes (like AddressSanitizer
-// or ThreadSanitizer) to avoid instrumenting some functions or global
-// variables based on a user-supplied blacklist.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/BlackList.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Regex.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
-#include <string>
-#include <utility>
-
-namespace llvm {
-
-BlackList::BlackList(const StringRef Path) {
- // Validate and open blacklist file.
- if (Path.empty()) return;
- OwningPtr<MemoryBuffer> File;
- if (error_code EC = MemoryBuffer::getFile(Path, File)) {
- report_fatal_error("Can't open blacklist file: " + Path + ": " +
- EC.message());
- }
-
- // Iterate through each line in the blacklist file.
- SmallVector<StringRef, 16> Lines;
- SplitString(File.take()->getBuffer(), Lines, "\n\r");
- StringMap<std::string> Regexps;
- for (SmallVector<StringRef, 16>::iterator I = Lines.begin(), E = Lines.end();
- I != E; ++I) {
- // Ignore empty lines and lines starting with "#"
- if (I->empty() || I->startswith("#"))
- continue;
- // Get our prefix and unparsed regexp.
- std::pair<StringRef, StringRef> SplitLine = I->split(":");
- StringRef Prefix = SplitLine.first;
- std::string Regexp = SplitLine.second;
- if (Regexp.empty()) {
- // Missing ':' in the line.
- report_fatal_error("malformed blacklist line: " + SplitLine.first);
- }
-
- // Replace * with .*
- for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos;
- pos += strlen(".*")) {
- Regexp.replace(pos, strlen("*"), ".*");
- }
-
- // Check that the regexp is valid.
- Regex CheckRE(Regexp);
- std::string Error;
- if (!CheckRE.isValid(Error)) {
- report_fatal_error("malformed blacklist regex: " + SplitLine.second +
- ": " + Error);
- }
-
- // Add this regexp into the proper group by its prefix.
- if (!Regexps[Prefix].empty())
- Regexps[Prefix] += "|";
- Regexps[Prefix] += Regexp;
- }
-
- // Iterate through each of the prefixes, and create Regexs for them.
- for (StringMap<std::string>::const_iterator I = Regexps.begin(),
- E = Regexps.end(); I != E; ++I) {
- Entries[I->getKey()] = new Regex(I->getValue());
- }
-}
-
-bool BlackList::isIn(const Function &F) const {
- return isIn(*F.getParent()) || inSection("fun", F.getName());
-}
-
-bool BlackList::isIn(const GlobalVariable &G) const {
- return isIn(*G.getParent()) || inSection("global", G.getName());
-}
-
-bool BlackList::isIn(const Module &M) const {
- return inSection("src", M.getModuleIdentifier());
-}
-
-static StringRef GetGVTypeString(const GlobalVariable &G) {
- // Types of GlobalVariables are always pointer types.
- Type *GType = G.getType()->getElementType();
- // For now we support blacklisting struct types only.
- if (StructType *SGType = dyn_cast<StructType>(GType)) {
- if (!SGType->isLiteral())
- return SGType->getName();
- }
- return "<unknown type>";
-}
-
-bool BlackList::isInInit(const GlobalVariable &G) const {
- return (isIn(*G.getParent()) ||
- inSection("global-init", G.getName()) ||
- inSection("global-init-type", GetGVTypeString(G)) ||
- inSection("global-init-src", G.getParent()->getModuleIdentifier()));
-}
-
-bool BlackList::inSection(const StringRef Section,
- const StringRef Query) const {
- StringMap<Regex*>::const_iterator I = Entries.find(Section);
- if (I == Entries.end()) return false;
-
- Regex *FunctionRegex = I->getValue();
- return FunctionRegex->match(Query);
-}
-
-} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
index b094d42..7a9f0f6 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -80,7 +80,7 @@ BasicBlock *BoundsChecking::getTrapBB() {
return TrapBB;
Function *Fn = Inst->getParent()->getParent();
- BasicBlock::iterator PrevInsertPoint = Builder->GetInsertPoint();
+ IRBuilder<>::InsertPointGuard Guard(*Builder);
TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
Builder->SetInsertPoint(TrapBB);
@@ -91,7 +91,6 @@ BasicBlock *BoundsChecking::getTrapBB() {
TrapCall->setDebugLoc(Inst->getDebugLoc());
Builder->CreateUnreachable();
- Builder->SetInsertPoint(PrevInsertPoint);
return TrapBB;
}
@@ -173,7 +172,8 @@ bool BoundsChecking::runOnFunction(Function &F) {
TrapBB = 0;
BuilderTy TheBuilder(F.getContext(), TargetFolder(TD));
Builder = &TheBuilder;
- ObjectSizeOffsetEvaluator TheObjSizeEval(TD, TLI, F.getContext());
+ ObjectSizeOffsetEvaluator TheObjSizeEval(TD, TLI, F.getContext(),
+ /*RoundToAlign=*/true);
ObjSizeEval = &TheObjSizeEval;
// check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
new file mode 100644
index 0000000..9b9e725
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -0,0 +1,1397 @@
+//===-- DataFlowSanitizer.cpp - dynamic data flow analysis ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file is a part of DataFlowSanitizer, a generalised dynamic data flow
+/// analysis.
+///
+/// Unlike other Sanitizer tools, this tool is not designed to detect a specific
+/// class of bugs on its own. Instead, it provides a generic dynamic data flow
+/// analysis framework to be used by clients to help detect application-specific
+/// issues within their own code.
+///
+/// The analysis is based on automatic propagation of data flow labels (also
+/// known as taint labels) through a program as it performs computation. Each
+/// byte of application memory is backed by two bytes of shadow memory which
+/// hold the label. On Linux/x86_64, memory is laid out as follows:
+///
+/// +--------------------+ 0x800000000000 (top of memory)
+/// | application memory |
+/// +--------------------+ 0x700000008000 (kAppAddr)
+/// | |
+/// | unused |
+/// | |
+/// +--------------------+ 0x200200000000 (kUnusedAddr)
+/// | union table |
+/// +--------------------+ 0x200000000000 (kUnionTableAddr)
+/// | shadow memory |
+/// +--------------------+ 0x000000010000 (kShadowAddr)
+/// | reserved by kernel |
+/// +--------------------+ 0x000000000000
+///
+/// To derive a shadow memory address from an application memory address,
+/// bits 44-46 are cleared to bring the address into the range
+/// [0x000000008000,0x100000000000). Then the address is shifted left by 1 to
+/// account for the double byte representation of shadow labels and move the
+/// address into the shadow memory range. See the function
+/// DataFlowSanitizer::getShadowAddress below.
+///
+/// For more information, please refer to the design document:
+/// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html
+
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SpecialCaseList.h"
+#include <iterator>
+
+using namespace llvm;
+
+// The -dfsan-preserve-alignment flag controls whether this pass assumes that
+// alignment requirements provided by the input IR are correct. For example,
+// if the input IR contains a load with alignment 8, this flag will cause
+// the shadow load to have alignment 16. This flag is disabled by default as
+// we have unfortunately encountered too much code (including Clang itself;
+// see PR14291) which performs misaligned access.
+static cl::opt<bool> ClPreserveAlignment(
+ "dfsan-preserve-alignment",
+ cl::desc("respect alignment requirements provided by input IR"), cl::Hidden,
+ cl::init(false));
+
+// The ABI list file controls how shadow parameters are passed. The pass treats
+// every function labelled "uninstrumented" in the ABI list file as conforming
+// to the "native" (i.e. unsanitized) ABI. Unless the ABI list contains
+// additional annotations for those functions, a call to one of those functions
+// will produce a warning message, as the labelling behaviour of the function is
+// unknown. The other supported annotations are "functional" and "discard",
+// which are described below under DataFlowSanitizer::WrapperKind.
+static cl::opt<std::string> ClABIListFile(
+ "dfsan-abilist",
+ cl::desc("File listing native ABI functions and how the pass treats them"),
+ cl::Hidden);
+
+// Controls whether the pass uses IA_Args or IA_TLS as the ABI for instrumented
+// functions (see DataFlowSanitizer::InstrumentedABI below).
+static cl::opt<bool> ClArgsABI(
+ "dfsan-args-abi",
+ cl::desc("Use the argument ABI rather than the TLS ABI"),
+ cl::Hidden);
+
+static cl::opt<bool> ClDebugNonzeroLabels(
+ "dfsan-debug-nonzero-labels",
+ cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
+ "load or return with a nonzero label"),
+ cl::Hidden);
+
+namespace {
+
+class DataFlowSanitizer : public ModulePass {
+ friend struct DFSanFunction;
+ friend class DFSanVisitor;
+
+ enum {
+ ShadowWidth = 16
+ };
+
+ /// Which ABI should be used for instrumented functions?
+ enum InstrumentedABI {
+ /// Argument and return value labels are passed through additional
+ /// arguments and by modifying the return type.
+ IA_Args,
+
+ /// Argument and return value labels are passed through TLS variables
+ /// __dfsan_arg_tls and __dfsan_retval_tls.
+ IA_TLS
+ };
+
+ /// How should calls to uninstrumented functions be handled?
+ enum WrapperKind {
+ /// This function is present in an uninstrumented form but we don't know
+ /// how it should be handled. Print a warning and call the function anyway.
+ /// Don't label the return value.
+ WK_Warning,
+
+ /// This function does not write to (user-accessible) memory, and its return
+ /// value is unlabelled.
+ WK_Discard,
+
+ /// This function does not write to (user-accessible) memory, and the label
+ /// of its return value is the union of the label of its arguments.
+ WK_Functional,
+
+ /// Instead of calling the function, a custom wrapper __dfsw_F is called,
+ /// where F is the name of the function. This function may wrap the
+ /// original function or provide its own implementation. This is similar to
+ /// the IA_Args ABI, except that IA_Args uses a struct return type to
+ /// pass the return value shadow in a register, while WK_Custom uses an
+ /// extra pointer argument to return the shadow. This allows the wrapped
+ /// form of the function type to be expressed in C.
+ WK_Custom
+ };
+
+ DataLayout *DL;
+ Module *Mod;
+ LLVMContext *Ctx;
+ IntegerType *ShadowTy;
+ PointerType *ShadowPtrTy;
+ IntegerType *IntptrTy;
+ ConstantInt *ZeroShadow;
+ ConstantInt *ShadowPtrMask;
+ ConstantInt *ShadowPtrMul;
+ Constant *ArgTLS;
+ Constant *RetvalTLS;
+ void *(*GetArgTLSPtr)();
+ void *(*GetRetvalTLSPtr)();
+ Constant *GetArgTLS;
+ Constant *GetRetvalTLS;
+ FunctionType *DFSanUnionFnTy;
+ FunctionType *DFSanUnionLoadFnTy;
+ FunctionType *DFSanUnimplementedFnTy;
+ FunctionType *DFSanSetLabelFnTy;
+ FunctionType *DFSanNonzeroLabelFnTy;
+ Constant *DFSanUnionFn;
+ Constant *DFSanUnionLoadFn;
+ Constant *DFSanUnimplementedFn;
+ Constant *DFSanSetLabelFn;
+ Constant *DFSanNonzeroLabelFn;
+ MDNode *ColdCallWeights;
+ OwningPtr<SpecialCaseList> ABIList;
+ DenseMap<Value *, Function *> UnwrappedFnMap;
+ AttributeSet ReadOnlyNoneAttrs;
+
+ Value *getShadowAddress(Value *Addr, Instruction *Pos);
+ Value *combineShadows(Value *V1, Value *V2, Instruction *Pos);
+ bool isInstrumented(const Function *F);
+ bool isInstrumented(const GlobalAlias *GA);
+ FunctionType *getArgsFunctionType(FunctionType *T);
+ FunctionType *getTrampolineFunctionType(FunctionType *T);
+ FunctionType *getCustomFunctionType(FunctionType *T);
+ InstrumentedABI getInstrumentedABI();
+ WrapperKind getWrapperKind(Function *F);
+ void addGlobalNamePrefix(GlobalValue *GV);
+ Function *buildWrapperFunction(Function *F, StringRef NewFName,
+ GlobalValue::LinkageTypes NewFLink,
+ FunctionType *NewFT);
+ Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName);
+
+ public:
+ DataFlowSanitizer(StringRef ABIListFile = StringRef(),
+ void *(*getArgTLS)() = 0, void *(*getRetValTLS)() = 0);
+ static char ID;
+ bool doInitialization(Module &M);
+ bool runOnModule(Module &M);
+};
+
+struct DFSanFunction {
+ DataFlowSanitizer &DFS;
+ Function *F;
+ DataFlowSanitizer::InstrumentedABI IA;
+ bool IsNativeABI;
+ Value *ArgTLSPtr;
+ Value *RetvalTLSPtr;
+ AllocaInst *LabelReturnAlloca;
+ DenseMap<Value *, Value *> ValShadowMap;
+ DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap;
+ std::vector<std::pair<PHINode *, PHINode *> > PHIFixups;
+ DenseSet<Instruction *> SkipInsts;
+ DenseSet<Value *> NonZeroChecks;
+
+ DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI)
+ : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()),
+ IsNativeABI(IsNativeABI), ArgTLSPtr(0), RetvalTLSPtr(0),
+ LabelReturnAlloca(0) {}
+ Value *getArgTLSPtr();
+ Value *getArgTLS(unsigned Index, Instruction *Pos);
+ Value *getRetvalTLS();
+ Value *getShadow(Value *V);
+ void setShadow(Instruction *I, Value *Shadow);
+ Value *combineOperandShadows(Instruction *Inst);
+ Value *loadShadow(Value *ShadowAddr, uint64_t Size, uint64_t Align,
+ Instruction *Pos);
+ void storeShadow(Value *Addr, uint64_t Size, uint64_t Align, Value *Shadow,
+ Instruction *Pos);
+};
+
+class DFSanVisitor : public InstVisitor<DFSanVisitor> {
+ public:
+ DFSanFunction &DFSF;
+ DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {}
+
+ void visitOperandShadowInst(Instruction &I);
+
+ void visitBinaryOperator(BinaryOperator &BO);
+ void visitCastInst(CastInst &CI);
+ void visitCmpInst(CmpInst &CI);
+ void visitGetElementPtrInst(GetElementPtrInst &GEPI);
+ void visitLoadInst(LoadInst &LI);
+ void visitStoreInst(StoreInst &SI);
+ void visitReturnInst(ReturnInst &RI);
+ void visitCallSite(CallSite CS);
+ void visitPHINode(PHINode &PN);
+ void visitExtractElementInst(ExtractElementInst &I);
+ void visitInsertElementInst(InsertElementInst &I);
+ void visitShuffleVectorInst(ShuffleVectorInst &I);
+ void visitExtractValueInst(ExtractValueInst &I);
+ void visitInsertValueInst(InsertValueInst &I);
+ void visitAllocaInst(AllocaInst &I);
+ void visitSelectInst(SelectInst &I);
+ void visitMemSetInst(MemSetInst &I);
+ void visitMemTransferInst(MemTransferInst &I);
+};
+
+}
+
+char DataFlowSanitizer::ID;
+INITIALIZE_PASS(DataFlowSanitizer, "dfsan",
+ "DataFlowSanitizer: dynamic data flow analysis.", false, false)
+
+ModulePass *llvm::createDataFlowSanitizerPass(StringRef ABIListFile,
+ void *(*getArgTLS)(),
+ void *(*getRetValTLS)()) {
+ return new DataFlowSanitizer(ABIListFile, getArgTLS, getRetValTLS);
+}
+
+DataFlowSanitizer::DataFlowSanitizer(StringRef ABIListFile,
+ void *(*getArgTLS)(),
+ void *(*getRetValTLS)())
+ : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS),
+ ABIList(SpecialCaseList::createOrDie(ABIListFile.empty() ? ClABIListFile
+ : ABIListFile)) {
+}
+
+FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) {
+ llvm::SmallVector<Type *, 4> ArgTypes;
+ std::copy(T->param_begin(), T->param_end(), std::back_inserter(ArgTypes));
+ for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
+ ArgTypes.push_back(ShadowTy);
+ if (T->isVarArg())
+ ArgTypes.push_back(ShadowPtrTy);
+ Type *RetType = T->getReturnType();
+ if (!RetType->isVoidTy())
+ RetType = StructType::get(RetType, ShadowTy, (Type *)0);
+ return FunctionType::get(RetType, ArgTypes, T->isVarArg());
+}
+
+FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) {
+ assert(!T->isVarArg());
+ llvm::SmallVector<Type *, 4> ArgTypes;
+ ArgTypes.push_back(T->getPointerTo());
+ std::copy(T->param_begin(), T->param_end(), std::back_inserter(ArgTypes));
+ for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
+ ArgTypes.push_back(ShadowTy);
+ Type *RetType = T->getReturnType();
+ if (!RetType->isVoidTy())
+ ArgTypes.push_back(ShadowPtrTy);
+ return FunctionType::get(T->getReturnType(), ArgTypes, false);
+}
+
+FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
+ assert(!T->isVarArg());
+ llvm::SmallVector<Type *, 4> ArgTypes;
+ for (FunctionType::param_iterator i = T->param_begin(), e = T->param_end();
+ i != e; ++i) {
+ FunctionType *FT;
+ if (isa<PointerType>(*i) && (FT = dyn_cast<FunctionType>(cast<PointerType>(
+ *i)->getElementType()))) {
+ ArgTypes.push_back(getTrampolineFunctionType(FT)->getPointerTo());
+ ArgTypes.push_back(Type::getInt8PtrTy(*Ctx));
+ } else {
+ ArgTypes.push_back(*i);
+ }
+ }
+ for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
+ ArgTypes.push_back(ShadowTy);
+ Type *RetType = T->getReturnType();
+ if (!RetType->isVoidTy())
+ ArgTypes.push_back(ShadowPtrTy);
+ return FunctionType::get(T->getReturnType(), ArgTypes, false);
+}
+
+bool DataFlowSanitizer::doInitialization(Module &M) {
+ DL = getAnalysisIfAvailable<DataLayout>();
+ if (!DL)
+ return false;
+
+ Mod = &M;
+ Ctx = &M.getContext();
+ ShadowTy = IntegerType::get(*Ctx, ShadowWidth);
+ ShadowPtrTy = PointerType::getUnqual(ShadowTy);
+ IntptrTy = DL->getIntPtrType(*Ctx);
+ ZeroShadow = ConstantInt::getSigned(ShadowTy, 0);
+ ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0x700000000000LL);
+ ShadowPtrMul = ConstantInt::getSigned(IntptrTy, ShadowWidth / 8);
+
+ Type *DFSanUnionArgs[2] = { ShadowTy, ShadowTy };
+ DFSanUnionFnTy =
+ FunctionType::get(ShadowTy, DFSanUnionArgs, /*isVarArg=*/ false);
+ Type *DFSanUnionLoadArgs[2] = { ShadowPtrTy, IntptrTy };
+ DFSanUnionLoadFnTy =
+ FunctionType::get(ShadowTy, DFSanUnionLoadArgs, /*isVarArg=*/ false);
+ DFSanUnimplementedFnTy = FunctionType::get(
+ Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
+ Type *DFSanSetLabelArgs[3] = { ShadowTy, Type::getInt8PtrTy(*Ctx), IntptrTy };
+ DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),
+ DFSanSetLabelArgs, /*isVarArg=*/false);
+ DFSanNonzeroLabelFnTy = FunctionType::get(
+ Type::getVoidTy(*Ctx), ArrayRef<Type *>(), /*isVarArg=*/false);
+
+ if (GetArgTLSPtr) {
+ Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
+ ArgTLS = 0;
+ GetArgTLS = ConstantExpr::getIntToPtr(
+ ConstantInt::get(IntptrTy, uintptr_t(GetArgTLSPtr)),
+ PointerType::getUnqual(
+ FunctionType::get(PointerType::getUnqual(ArgTLSTy), (Type *)0)));
+ }
+ if (GetRetvalTLSPtr) {
+ RetvalTLS = 0;
+ GetRetvalTLS = ConstantExpr::getIntToPtr(
+ ConstantInt::get(IntptrTy, uintptr_t(GetRetvalTLSPtr)),
+ PointerType::getUnqual(
+ FunctionType::get(PointerType::getUnqual(ShadowTy), (Type *)0)));
+ }
+
+ ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
+ return true;
+}
+
+bool DataFlowSanitizer::isInstrumented(const Function *F) {
+ return !ABIList->isIn(*F, "uninstrumented");
+}
+
+bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {
+ return !ABIList->isIn(*GA, "uninstrumented");
+}
+
+DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() {
+ return ClArgsABI ? IA_Args : IA_TLS;
+}
+
+DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
+ if (ABIList->isIn(*F, "functional"))
+ return WK_Functional;
+ if (ABIList->isIn(*F, "discard"))
+ return WK_Discard;
+ if (ABIList->isIn(*F, "custom"))
+ return WK_Custom;
+
+ return WK_Warning;
+}
+
+void DataFlowSanitizer::addGlobalNamePrefix(GlobalValue *GV) {
+ std::string GVName = GV->getName(), Prefix = "dfs$";
+ GV->setName(Prefix + GVName);
+
+ // Try to change the name of the function in module inline asm. We only do
+ // this for specific asm directives, currently only ".symver", to try to avoid
+ // corrupting asm which happens to contain the symbol name as a substring.
+ // Note that the substitution for .symver assumes that the versioned symbol
+ // also has an instrumented name.
+ std::string Asm = GV->getParent()->getModuleInlineAsm();
+ std::string SearchStr = ".symver " + GVName + ",";
+ size_t Pos = Asm.find(SearchStr);
+ if (Pos != std::string::npos) {
+ Asm.replace(Pos, SearchStr.size(),
+ ".symver " + Prefix + GVName + "," + Prefix);
+ GV->getParent()->setModuleInlineAsm(Asm);
+ }
+}
+
+Function *
+DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
+ GlobalValue::LinkageTypes NewFLink,
+ FunctionType *NewFT) {
+ FunctionType *FT = F->getFunctionType();
+ Function *NewF = Function::Create(NewFT, NewFLink, NewFName,
+ F->getParent());
+ NewF->copyAttributesFrom(F);
+ NewF->removeAttributes(
+ AttributeSet::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewFT->getReturnType(),
+ AttributeSet::ReturnIndex));
+
+ BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
+ std::vector<Value *> Args;
+ unsigned n = FT->getNumParams();
+ for (Function::arg_iterator ai = NewF->arg_begin(); n != 0; ++ai, --n)
+ Args.push_back(&*ai);
+ CallInst *CI = CallInst::Create(F, Args, "", BB);
+ if (FT->getReturnType()->isVoidTy())
+ ReturnInst::Create(*Ctx, BB);
+ else
+ ReturnInst::Create(*Ctx, CI, BB);
+
+ return NewF;
+}
+
+Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
+ StringRef FName) {
+ FunctionType *FTT = getTrampolineFunctionType(FT);
+ Constant *C = Mod->getOrInsertFunction(FName, FTT);
+ Function *F = dyn_cast<Function>(C);
+ if (F && F->isDeclaration()) {
+ F->setLinkage(GlobalValue::LinkOnceODRLinkage);
+ BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
+ std::vector<Value *> Args;
+ Function::arg_iterator AI = F->arg_begin(); ++AI;
+ for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N)
+ Args.push_back(&*AI);
+ CallInst *CI =
+ CallInst::Create(&F->getArgumentList().front(), Args, "", BB);
+ ReturnInst *RI;
+ if (FT->getReturnType()->isVoidTy())
+ RI = ReturnInst::Create(*Ctx, BB);
+ else
+ RI = ReturnInst::Create(*Ctx, CI, BB);
+
+ DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true);
+ Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; ++ValAI;
+ for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N)
+ DFSF.ValShadowMap[ValAI] = ShadowAI;
+ DFSanVisitor(DFSF).visitCallInst(*CI);
+ if (!FT->getReturnType()->isVoidTy())
+ new StoreInst(DFSF.getShadow(RI->getReturnValue()),
+ &F->getArgumentList().back(), RI);
+ }
+
+ return C;
+}
+
+bool DataFlowSanitizer::runOnModule(Module &M) {
+ if (!DL)
+ return false;
+
+ if (ABIList->isIn(M, "skip"))
+ return false;
+
+ if (!GetArgTLSPtr) {
+ Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
+ ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy);
+ if (GlobalVariable *G = dyn_cast<GlobalVariable>(ArgTLS))
+ G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
+ }
+ if (!GetRetvalTLSPtr) {
+ RetvalTLS = Mod->getOrInsertGlobal("__dfsan_retval_tls", ShadowTy);
+ if (GlobalVariable *G = dyn_cast<GlobalVariable>(RetvalTLS))
+ G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
+ }
+
+ DFSanUnionFn = Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy);
+ if (Function *F = dyn_cast<Function>(DFSanUnionFn)) {
+ F->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
+ F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ F->addAttribute(1, Attribute::ZExt);
+ F->addAttribute(2, Attribute::ZExt);
+ }
+ DFSanUnionLoadFn =
+ Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy);
+ if (Function *F = dyn_cast<Function>(DFSanUnionLoadFn)) {
+ F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ }
+ DFSanUnimplementedFn =
+ Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
+ DFSanSetLabelFn =
+ Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy);
+ if (Function *F = dyn_cast<Function>(DFSanSetLabelFn)) {
+ F->addAttribute(1, Attribute::ZExt);
+ }
+ DFSanNonzeroLabelFn =
+ Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);
+
+ std::vector<Function *> FnsToInstrument;
+ llvm::SmallPtrSet<Function *, 2> FnsWithNativeABI;
+ for (Module::iterator i = M.begin(), e = M.end(); i != e; ++i) {
+ if (!i->isIntrinsic() &&
+ i != DFSanUnionFn &&
+ i != DFSanUnionLoadFn &&
+ i != DFSanUnimplementedFn &&
+ i != DFSanSetLabelFn &&
+ i != DFSanNonzeroLabelFn)
+ FnsToInstrument.push_back(&*i);
+ }
+
+ // Give function aliases prefixes when necessary, and build wrappers where the
+ // instrumentedness is inconsistent.
+ for (Module::alias_iterator i = M.alias_begin(), e = M.alias_end(); i != e;) {
+ GlobalAlias *GA = &*i;
+ ++i;
+ // Don't stop on weak. We assume people aren't playing games with the
+ // instrumentedness of overridden weak aliases.
+ if (Function *F = dyn_cast<Function>(
+ GA->resolveAliasedGlobal(/*stopOnWeak=*/false))) {
+ bool GAInst = isInstrumented(GA), FInst = isInstrumented(F);
+ if (GAInst && FInst) {
+ addGlobalNamePrefix(GA);
+ } else if (GAInst != FInst) {
+ // Non-instrumented alias of an instrumented function, or vice versa.
+ // Replace the alias with a native-ABI wrapper of the aliasee. The pass
+ // below will take care of instrumenting it.
+ Function *NewF =
+ buildWrapperFunction(F, "", GA->getLinkage(), F->getFunctionType());
+ GA->replaceAllUsesWith(NewF);
+ NewF->takeName(GA);
+ GA->eraseFromParent();
+ FnsToInstrument.push_back(NewF);
+ }
+ }
+ }
+
+ AttrBuilder B;
+ B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
+ ReadOnlyNoneAttrs = AttributeSet::get(*Ctx, AttributeSet::FunctionIndex, B);
+
+ // First, change the ABI of every function in the module. ABI-listed
+ // functions keep their original ABI and get a wrapper function.
+ for (std::vector<Function *>::iterator i = FnsToInstrument.begin(),
+ e = FnsToInstrument.end();
+ i != e; ++i) {
+ Function &F = **i;
+ FunctionType *FT = F.getFunctionType();
+
+ bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() &&
+ FT->getReturnType()->isVoidTy());
+
+ if (isInstrumented(&F)) {
+ // Instrumented functions get a 'dfs$' prefix. This allows us to more
+ // easily identify cases of mismatching ABIs.
+ if (getInstrumentedABI() == IA_Args && !IsZeroArgsVoidRet) {
+ FunctionType *NewFT = getArgsFunctionType(FT);
+ Function *NewF = Function::Create(NewFT, F.getLinkage(), "", &M);
+ NewF->copyAttributesFrom(&F);
+ NewF->removeAttributes(
+ AttributeSet::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewFT->getReturnType(),
+ AttributeSet::ReturnIndex));
+ for (Function::arg_iterator FArg = F.arg_begin(),
+ NewFArg = NewF->arg_begin(),
+ FArgEnd = F.arg_end();
+ FArg != FArgEnd; ++FArg, ++NewFArg) {
+ FArg->replaceAllUsesWith(NewFArg);
+ }
+ NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList());
+
+ for (Function::use_iterator ui = F.use_begin(), ue = F.use_end();
+ ui != ue;) {
+ BlockAddress *BA = dyn_cast<BlockAddress>(ui.getUse().getUser());
+ ++ui;
+ if (BA) {
+ BA->replaceAllUsesWith(
+ BlockAddress::get(NewF, BA->getBasicBlock()));
+ delete BA;
+ }
+ }
+ F.replaceAllUsesWith(
+ ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT)));
+ NewF->takeName(&F);
+ F.eraseFromParent();
+ *i = NewF;
+ addGlobalNamePrefix(NewF);
+ } else {
+ addGlobalNamePrefix(&F);
+ }
+ // Hopefully, nobody will try to indirectly call a vararg
+ // function... yet.
+ } else if (FT->isVarArg()) {
+ UnwrappedFnMap[&F] = &F;
+ *i = 0;
+ } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
+ // Build a wrapper function for F. The wrapper simply calls F, and is
+ // added to FnsToInstrument so that any instrumentation according to its
+ // WrapperKind is done in the second pass below.
+ FunctionType *NewFT = getInstrumentedABI() == IA_Args
+ ? getArgsFunctionType(FT)
+ : FT;
+ Function *NewF = buildWrapperFunction(
+ &F, std::string("dfsw$") + std::string(F.getName()),
+ GlobalValue::LinkOnceODRLinkage, NewFT);
+ if (getInstrumentedABI() == IA_TLS)
+ NewF->removeAttributes(AttributeSet::FunctionIndex, ReadOnlyNoneAttrs);
+
+ Value *WrappedFnCst =
+ ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
+ F.replaceAllUsesWith(WrappedFnCst);
+ UnwrappedFnMap[WrappedFnCst] = &F;
+ *i = NewF;
+
+ if (!F.isDeclaration()) {
+ // This function is probably defining an interposition of an
+ // uninstrumented function and hence needs to keep the original ABI.
+ // But any functions it may call need to use the instrumented ABI, so
+ // we instrument it in a mode which preserves the original ABI.
+ FnsWithNativeABI.insert(&F);
+
+ // This code needs to rebuild the iterators, as they may be invalidated
+ // by the push_back, taking care that the new range does not include
+ // any functions added by this code.
+ size_t N = i - FnsToInstrument.begin(),
+ Count = e - FnsToInstrument.begin();
+ FnsToInstrument.push_back(&F);
+ i = FnsToInstrument.begin() + N;
+ e = FnsToInstrument.begin() + Count;
+ }
+ }
+ }
+
+ for (std::vector<Function *>::iterator i = FnsToInstrument.begin(),
+ e = FnsToInstrument.end();
+ i != e; ++i) {
+ if (!*i || (*i)->isDeclaration())
+ continue;
+
+ removeUnreachableBlocks(**i);
+
+ DFSanFunction DFSF(*this, *i, FnsWithNativeABI.count(*i));
+
+ // DFSanVisitor may create new basic blocks, which confuses df_iterator.
+ // Build a copy of the list before iterating over it.
+ llvm::SmallVector<BasicBlock *, 4> BBList;
+ std::copy(df_begin(&(*i)->getEntryBlock()), df_end(&(*i)->getEntryBlock()),
+ std::back_inserter(BBList));
+
+ for (llvm::SmallVector<BasicBlock *, 4>::iterator i = BBList.begin(),
+ e = BBList.end();
+ i != e; ++i) {
+ Instruction *Inst = &(*i)->front();
+ while (1) {
+ // DFSanVisitor may split the current basic block, changing the current
+ // instruction's next pointer and moving the next instruction to the
+ // tail block from which we should continue.
+ Instruction *Next = Inst->getNextNode();
+ // DFSanVisitor may delete Inst, so keep track of whether it was a
+ // terminator.
+ bool IsTerminator = isa<TerminatorInst>(Inst);
+ if (!DFSF.SkipInsts.count(Inst))
+ DFSanVisitor(DFSF).visit(Inst);
+ if (IsTerminator)
+ break;
+ Inst = Next;
+ }
+ }
+
+ // We will not necessarily be able to compute the shadow for every phi node
+ // until we have visited every block. Therefore, the code that handles phi
+ // nodes adds them to the PHIFixups list so that they can be properly
+ // handled here.
+ for (std::vector<std::pair<PHINode *, PHINode *> >::iterator
+ i = DFSF.PHIFixups.begin(),
+ e = DFSF.PHIFixups.end();
+ i != e; ++i) {
+ for (unsigned val = 0, n = i->first->getNumIncomingValues(); val != n;
+ ++val) {
+ i->second->setIncomingValue(
+ val, DFSF.getShadow(i->first->getIncomingValue(val)));
+ }
+ }
+
+ // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy
+ // places (i.e. instructions in basic blocks we haven't even begun visiting
+ // yet). To make our life easier, do this work in a pass after the main
+ // instrumentation.
+ if (ClDebugNonzeroLabels) {
+ for (DenseSet<Value *>::iterator i = DFSF.NonZeroChecks.begin(),
+ e = DFSF.NonZeroChecks.end();
+ i != e; ++i) {
+ Instruction *Pos;
+ if (Instruction *I = dyn_cast<Instruction>(*i))
+ Pos = I->getNextNode();
+ else
+ Pos = DFSF.F->getEntryBlock().begin();
+ while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
+ Pos = Pos->getNextNode();
+ IRBuilder<> IRB(Pos);
+ Instruction *NeInst = cast<Instruction>(
+ IRB.CreateICmpNE(*i, DFSF.DFS.ZeroShadow));
+ BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
+ NeInst, /*Unreachable=*/ false, ColdCallWeights));
+ IRBuilder<> ThenIRB(BI);
+ ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn);
+ }
+ }
+ }
+
+ return false;
+}
+
+Value *DFSanFunction::getArgTLSPtr() {
+ if (ArgTLSPtr)
+ return ArgTLSPtr;
+ if (DFS.ArgTLS)
+ return ArgTLSPtr = DFS.ArgTLS;
+
+ IRBuilder<> IRB(F->getEntryBlock().begin());
+ return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLS);
+}
+
+Value *DFSanFunction::getRetvalTLS() {
+ if (RetvalTLSPtr)
+ return RetvalTLSPtr;
+ if (DFS.RetvalTLS)
+ return RetvalTLSPtr = DFS.RetvalTLS;
+
+ IRBuilder<> IRB(F->getEntryBlock().begin());
+ return RetvalTLSPtr = IRB.CreateCall(DFS.GetRetvalTLS);
+}
+
+Value *DFSanFunction::getArgTLS(unsigned Idx, Instruction *Pos) {
+ IRBuilder<> IRB(Pos);
+ return IRB.CreateConstGEP2_64(getArgTLSPtr(), 0, Idx);
+}
+
+Value *DFSanFunction::getShadow(Value *V) {
+ if (!isa<Argument>(V) && !isa<Instruction>(V))
+ return DFS.ZeroShadow;
+ Value *&Shadow = ValShadowMap[V];
+ if (!Shadow) {
+ if (Argument *A = dyn_cast<Argument>(V)) {
+ if (IsNativeABI)
+ return DFS.ZeroShadow;
+ switch (IA) {
+ case DataFlowSanitizer::IA_TLS: {
+ Value *ArgTLSPtr = getArgTLSPtr();
+ Instruction *ArgTLSPos =
+ DFS.ArgTLS ? &*F->getEntryBlock().begin()
+ : cast<Instruction>(ArgTLSPtr)->getNextNode();
+ IRBuilder<> IRB(ArgTLSPos);
+ Shadow = IRB.CreateLoad(getArgTLS(A->getArgNo(), ArgTLSPos));
+ break;
+ }
+ case DataFlowSanitizer::IA_Args: {
+ unsigned ArgIdx = A->getArgNo() + F->getArgumentList().size() / 2;
+ Function::arg_iterator i = F->arg_begin();
+ while (ArgIdx--)
+ ++i;
+ Shadow = i;
+ assert(Shadow->getType() == DFS.ShadowTy);
+ break;
+ }
+ }
+ NonZeroChecks.insert(Shadow);
+ } else {
+ Shadow = DFS.ZeroShadow;
+ }
+ }
+ return Shadow;
+}
+
+void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
+ assert(!ValShadowMap.count(I));
+ assert(Shadow->getType() == DFS.ShadowTy);
+ ValShadowMap[I] = Shadow;
+}
+
+Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) {
+ assert(Addr != RetvalTLS && "Reinstrumenting?");
+ IRBuilder<> IRB(Pos);
+ return IRB.CreateIntToPtr(
+ IRB.CreateMul(
+ IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy), ShadowPtrMask),
+ ShadowPtrMul),
+ ShadowPtrTy);
+}
+
+// Generates IR to compute the union of the two given shadows, inserting it
+// before Pos. Returns the computed union Value.
+Value *DataFlowSanitizer::combineShadows(Value *V1, Value *V2,
+ Instruction *Pos) {
+ if (V1 == ZeroShadow)
+ return V2;
+ if (V2 == ZeroShadow)
+ return V1;
+ if (V1 == V2)
+ return V1;
+ IRBuilder<> IRB(Pos);
+ BasicBlock *Head = Pos->getParent();
+ Value *Ne = IRB.CreateICmpNE(V1, V2);
+ Instruction *NeInst = dyn_cast<Instruction>(Ne);
+ if (NeInst) {
+ BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
+ NeInst, /*Unreachable=*/ false, ColdCallWeights));
+ IRBuilder<> ThenIRB(BI);
+ CallInst *Call = ThenIRB.CreateCall2(DFSanUnionFn, V1, V2);
+ Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ Call->addAttribute(1, Attribute::ZExt);
+ Call->addAttribute(2, Attribute::ZExt);
+
+ BasicBlock *Tail = BI->getSuccessor(0);
+ PHINode *Phi = PHINode::Create(ShadowTy, 2, "", Tail->begin());
+ Phi->addIncoming(Call, Call->getParent());
+ Phi->addIncoming(V1, Head);
+ Pos = Phi;
+ return Phi;
+ } else {
+ assert(0 && "todo");
+ return 0;
+ }
+}
+
+// A convenience function which folds the shadows of each of the operands
+// of the provided instruction Inst, inserting the IR before Inst. Returns
+// the computed union Value.
+Value *DFSanFunction::combineOperandShadows(Instruction *Inst) {
+ if (Inst->getNumOperands() == 0)
+ return DFS.ZeroShadow;
+
+ Value *Shadow = getShadow(Inst->getOperand(0));
+ for (unsigned i = 1, n = Inst->getNumOperands(); i != n; ++i) {
+ Shadow = DFS.combineShadows(Shadow, getShadow(Inst->getOperand(i)), Inst);
+ }
+ return Shadow;
+}
+
+void DFSanVisitor::visitOperandShadowInst(Instruction &I) {
+ Value *CombinedShadow = DFSF.combineOperandShadows(&I);
+ DFSF.setShadow(&I, CombinedShadow);
+}
+
+// Generates IR to load shadow corresponding to bytes [Addr, Addr+Size), where
+// Addr has alignment Align, and take the union of each of those shadows.
+Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
+ Instruction *Pos) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
+ llvm::DenseMap<AllocaInst *, AllocaInst *>::iterator i =
+ AllocaShadowMap.find(AI);
+ if (i != AllocaShadowMap.end()) {
+ IRBuilder<> IRB(Pos);
+ return IRB.CreateLoad(i->second);
+ }
+ }
+
+ uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8;
+ SmallVector<Value *, 2> Objs;
+ GetUnderlyingObjects(Addr, Objs, DFS.DL);
+ bool AllConstants = true;
+ for (SmallVector<Value *, 2>::iterator i = Objs.begin(), e = Objs.end();
+ i != e; ++i) {
+ if (isa<Function>(*i) || isa<BlockAddress>(*i))
+ continue;
+ if (isa<GlobalVariable>(*i) && cast<GlobalVariable>(*i)->isConstant())
+ continue;
+
+ AllConstants = false;
+ break;
+ }
+ if (AllConstants)
+ return DFS.ZeroShadow;
+
+ Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
+ switch (Size) {
+ case 0:
+ return DFS.ZeroShadow;
+ case 1: {
+ LoadInst *LI = new LoadInst(ShadowAddr, "", Pos);
+ LI->setAlignment(ShadowAlign);
+ return LI;
+ }
+ case 2: {
+ IRBuilder<> IRB(Pos);
+ Value *ShadowAddr1 =
+ IRB.CreateGEP(ShadowAddr, ConstantInt::get(DFS.IntptrTy, 1));
+ return DFS.combineShadows(IRB.CreateAlignedLoad(ShadowAddr, ShadowAlign),
+ IRB.CreateAlignedLoad(ShadowAddr1, ShadowAlign),
+ Pos);
+ }
+ }
+ if (Size % (64 / DFS.ShadowWidth) == 0) {
+ // Fast path for the common case where each byte has identical shadow: load
+ // shadow 64 bits at a time, fall out to a __dfsan_union_load call if any
+ // shadow is non-equal.
+ BasicBlock *FallbackBB = BasicBlock::Create(*DFS.Ctx, "", F);
+ IRBuilder<> FallbackIRB(FallbackBB);
+ CallInst *FallbackCall = FallbackIRB.CreateCall2(
+ DFS.DFSanUnionLoadFn, ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size));
+ FallbackCall->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+
+ // Compare each of the shadows stored in the loaded 64 bits to each other,
+ // by computing (WideShadow rotl ShadowWidth) == WideShadow.
+ IRBuilder<> IRB(Pos);
+ Value *WideAddr =
+ IRB.CreateBitCast(ShadowAddr, Type::getInt64PtrTy(*DFS.Ctx));
+ Value *WideShadow = IRB.CreateAlignedLoad(WideAddr, ShadowAlign);
+ Value *TruncShadow = IRB.CreateTrunc(WideShadow, DFS.ShadowTy);
+ Value *ShlShadow = IRB.CreateShl(WideShadow, DFS.ShadowWidth);
+ Value *ShrShadow = IRB.CreateLShr(WideShadow, 64 - DFS.ShadowWidth);
+ Value *RotShadow = IRB.CreateOr(ShlShadow, ShrShadow);
+ Value *ShadowsEq = IRB.CreateICmpEQ(WideShadow, RotShadow);
+
+ BasicBlock *Head = Pos->getParent();
+ BasicBlock *Tail = Head->splitBasicBlock(Pos);
+ // In the following code LastBr will refer to the previous basic block's
+ // conditional branch instruction, whose true successor is fixed up to point
+ // to the next block during the loop below or to the tail after the final
+ // iteration.
+ BranchInst *LastBr = BranchInst::Create(FallbackBB, FallbackBB, ShadowsEq);
+ ReplaceInstWithInst(Head->getTerminator(), LastBr);
+
+ for (uint64_t Ofs = 64 / DFS.ShadowWidth; Ofs != Size;
+ Ofs += 64 / DFS.ShadowWidth) {
+ BasicBlock *NextBB = BasicBlock::Create(*DFS.Ctx, "", F);
+ IRBuilder<> NextIRB(NextBB);
+ WideAddr = NextIRB.CreateGEP(WideAddr, ConstantInt::get(DFS.IntptrTy, 1));
+ Value *NextWideShadow = NextIRB.CreateAlignedLoad(WideAddr, ShadowAlign);
+ ShadowsEq = NextIRB.CreateICmpEQ(WideShadow, NextWideShadow);
+ LastBr->setSuccessor(0, NextBB);
+ LastBr = NextIRB.CreateCondBr(ShadowsEq, FallbackBB, FallbackBB);
+ }
+
+ LastBr->setSuccessor(0, Tail);
+ FallbackIRB.CreateBr(Tail);
+ PHINode *Shadow = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front());
+ Shadow->addIncoming(FallbackCall, FallbackBB);
+ Shadow->addIncoming(TruncShadow, LastBr->getParent());
+ return Shadow;
+ }
+
+ IRBuilder<> IRB(Pos);
+ CallInst *FallbackCall = IRB.CreateCall2(
+ DFS.DFSanUnionLoadFn, ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size));
+ FallbackCall->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ return FallbackCall;
+}
+
+void DFSanVisitor::visitLoadInst(LoadInst &LI) {
+ uint64_t Size = DFSF.DFS.DL->getTypeStoreSize(LI.getType());
+ uint64_t Align;
+ if (ClPreserveAlignment) {
+ Align = LI.getAlignment();
+ if (Align == 0)
+ Align = DFSF.DFS.DL->getABITypeAlignment(LI.getType());
+ } else {
+ Align = 1;
+ }
+ IRBuilder<> IRB(&LI);
+ Value *LoadedShadow =
+ DFSF.loadShadow(LI.getPointerOperand(), Size, Align, &LI);
+ Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
+ Value *CombinedShadow = DFSF.DFS.combineShadows(LoadedShadow, PtrShadow, &LI);
+ if (CombinedShadow != DFSF.DFS.ZeroShadow)
+ DFSF.NonZeroChecks.insert(CombinedShadow);
+
+ DFSF.setShadow(&LI, CombinedShadow);
+}
+
+void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,
+ Value *Shadow, Instruction *Pos) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
+ llvm::DenseMap<AllocaInst *, AllocaInst *>::iterator i =
+ AllocaShadowMap.find(AI);
+ if (i != AllocaShadowMap.end()) {
+ IRBuilder<> IRB(Pos);
+ IRB.CreateStore(Shadow, i->second);
+ return;
+ }
+ }
+
+ uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8;
+ IRBuilder<> IRB(Pos);
+ Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
+ if (Shadow == DFS.ZeroShadow) {
+ IntegerType *ShadowTy = IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidth);
+ Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0);
+ Value *ExtShadowAddr =
+ IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowTy));
+ IRB.CreateAlignedStore(ExtZeroShadow, ExtShadowAddr, ShadowAlign);
+ return;
+ }
+
+ const unsigned ShadowVecSize = 128 / DFS.ShadowWidth;
+ uint64_t Offset = 0;
+ if (Size >= ShadowVecSize) {
+ VectorType *ShadowVecTy = VectorType::get(DFS.ShadowTy, ShadowVecSize);
+ Value *ShadowVec = UndefValue::get(ShadowVecTy);
+ for (unsigned i = 0; i != ShadowVecSize; ++i) {
+ ShadowVec = IRB.CreateInsertElement(
+ ShadowVec, Shadow, ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), i));
+ }
+ Value *ShadowVecAddr =
+ IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy));
+ do {
+ Value *CurShadowVecAddr = IRB.CreateConstGEP1_32(ShadowVecAddr, Offset);
+ IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
+ Size -= ShadowVecSize;
+ ++Offset;
+ } while (Size >= ShadowVecSize);
+ Offset *= ShadowVecSize;
+ }
+ while (Size > 0) {
+ Value *CurShadowAddr = IRB.CreateConstGEP1_32(ShadowAddr, Offset);
+ IRB.CreateAlignedStore(Shadow, CurShadowAddr, ShadowAlign);
+ --Size;
+ ++Offset;
+ }
+}
+
+void DFSanVisitor::visitStoreInst(StoreInst &SI) {
+ uint64_t Size =
+ DFSF.DFS.DL->getTypeStoreSize(SI.getValueOperand()->getType());
+ uint64_t Align;
+ if (ClPreserveAlignment) {
+ Align = SI.getAlignment();
+ if (Align == 0)
+ Align = DFSF.DFS.DL->getABITypeAlignment(SI.getValueOperand()->getType());
+ } else {
+ Align = 1;
+ }
+ DFSF.storeShadow(SI.getPointerOperand(), Size, Align,
+ DFSF.getShadow(SI.getValueOperand()), &SI);
+}
+
+void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
+ visitOperandShadowInst(BO);
+}
+
+void DFSanVisitor::visitCastInst(CastInst &CI) { visitOperandShadowInst(CI); }
+
+void DFSanVisitor::visitCmpInst(CmpInst &CI) { visitOperandShadowInst(CI); }
+
+void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+ visitOperandShadowInst(GEPI);
+}
+
+void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) {
+ visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) {
+ visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) {
+ visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
+ visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {
+ visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitAllocaInst(AllocaInst &I) {
+ bool AllLoadsStores = true;
+ for (Instruction::use_iterator i = I.use_begin(), e = I.use_end(); i != e;
+ ++i) {
+ if (isa<LoadInst>(*i))
+ continue;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(*i)) {
+ if (SI->getPointerOperand() == &I)
+ continue;
+ }
+
+ AllLoadsStores = false;
+ break;
+ }
+ if (AllLoadsStores) {
+ IRBuilder<> IRB(&I);
+ DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.ShadowTy);
+ }
+ DFSF.setShadow(&I, DFSF.DFS.ZeroShadow);
+}
+
+void DFSanVisitor::visitSelectInst(SelectInst &I) {
+ Value *CondShadow = DFSF.getShadow(I.getCondition());
+ Value *TrueShadow = DFSF.getShadow(I.getTrueValue());
+ Value *FalseShadow = DFSF.getShadow(I.getFalseValue());
+
+ if (isa<VectorType>(I.getCondition()->getType())) {
+ DFSF.setShadow(
+ &I, DFSF.DFS.combineShadows(
+ CondShadow,
+ DFSF.DFS.combineShadows(TrueShadow, FalseShadow, &I), &I));
+ } else {
+ Value *ShadowSel;
+ if (TrueShadow == FalseShadow) {
+ ShadowSel = TrueShadow;
+ } else {
+ ShadowSel =
+ SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow, "", &I);
+ }
+ DFSF.setShadow(&I, DFSF.DFS.combineShadows(CondShadow, ShadowSel, &I));
+ }
+}
+
+void DFSanVisitor::visitMemSetInst(MemSetInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *ValShadow = DFSF.getShadow(I.getValue());
+ IRB.CreateCall3(
+ DFSF.DFS.DFSanSetLabelFn, ValShadow,
+ IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy(*DFSF.DFS.Ctx)),
+ IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy));
+}
+
+void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *DestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I);
+ Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), &I);
+ Value *LenShadow = IRB.CreateMul(
+ I.getLength(),
+ ConstantInt::get(I.getLength()->getType(), DFSF.DFS.ShadowWidth / 8));
+ Value *AlignShadow;
+ if (ClPreserveAlignment) {
+ AlignShadow = IRB.CreateMul(I.getAlignmentCst(),
+ ConstantInt::get(I.getAlignmentCst()->getType(),
+ DFSF.DFS.ShadowWidth / 8));
+ } else {
+ AlignShadow = ConstantInt::get(I.getAlignmentCst()->getType(),
+ DFSF.DFS.ShadowWidth / 8);
+ }
+ Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx);
+ DestShadow = IRB.CreateBitCast(DestShadow, Int8Ptr);
+ SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr);
+ IRB.CreateCall5(I.getCalledValue(), DestShadow, SrcShadow, LenShadow,
+ AlignShadow, I.getVolatileCst());
+}
+
+void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
+ if (!DFSF.IsNativeABI && RI.getReturnValue()) {
+ switch (DFSF.IA) {
+ case DataFlowSanitizer::IA_TLS: {
+ Value *S = DFSF.getShadow(RI.getReturnValue());
+ IRBuilder<> IRB(&RI);
+ IRB.CreateStore(S, DFSF.getRetvalTLS());
+ break;
+ }
+ case DataFlowSanitizer::IA_Args: {
+ IRBuilder<> IRB(&RI);
+ Type *RT = DFSF.F->getFunctionType()->getReturnType();
+ Value *InsVal =
+ IRB.CreateInsertValue(UndefValue::get(RT), RI.getReturnValue(), 0);
+ Value *InsShadow =
+ IRB.CreateInsertValue(InsVal, DFSF.getShadow(RI.getReturnValue()), 1);
+ RI.setOperand(0, InsShadow);
+ break;
+ }
+ }
+ }
+}
+
+void DFSanVisitor::visitCallSite(CallSite CS) {
+ Function *F = CS.getCalledFunction();
+ if ((F && F->isIntrinsic()) || isa<InlineAsm>(CS.getCalledValue())) {
+ visitOperandShadowInst(*CS.getInstruction());
+ return;
+ }
+
+ IRBuilder<> IRB(CS.getInstruction());
+
+ DenseMap<Value *, Function *>::iterator i =
+ DFSF.DFS.UnwrappedFnMap.find(CS.getCalledValue());
+ if (i != DFSF.DFS.UnwrappedFnMap.end()) {
+ Function *F = i->second;
+ switch (DFSF.DFS.getWrapperKind(F)) {
+ case DataFlowSanitizer::WK_Warning: {
+ CS.setCalledFunction(F);
+ IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn,
+ IRB.CreateGlobalStringPtr(F->getName()));
+ DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow);
+ return;
+ }
+ case DataFlowSanitizer::WK_Discard: {
+ CS.setCalledFunction(F);
+ DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow);
+ return;
+ }
+ case DataFlowSanitizer::WK_Functional: {
+ CS.setCalledFunction(F);
+ visitOperandShadowInst(*CS.getInstruction());
+ return;
+ }
+ case DataFlowSanitizer::WK_Custom: {
+ // Don't try to handle invokes of custom functions, it's too complicated.
+ // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_
+ // wrapper.
+ if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
+ FunctionType *FT = F->getFunctionType();
+ FunctionType *CustomFT = DFSF.DFS.getCustomFunctionType(FT);
+ std::string CustomFName = "__dfsw_";
+ CustomFName += F->getName();
+ Constant *CustomF =
+ DFSF.DFS.Mod->getOrInsertFunction(CustomFName, CustomFT);
+ if (Function *CustomFn = dyn_cast<Function>(CustomF)) {
+ CustomFn->copyAttributesFrom(F);
+
+ // Custom functions returning non-void will write to the return label.
+ if (!FT->getReturnType()->isVoidTy()) {
+ CustomFn->removeAttributes(AttributeSet::FunctionIndex,
+ DFSF.DFS.ReadOnlyNoneAttrs);
+ }
+ }
+
+ std::vector<Value *> Args;
+
+ CallSite::arg_iterator i = CS.arg_begin();
+ for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) {
+ Type *T = (*i)->getType();
+ FunctionType *ParamFT;
+ if (isa<PointerType>(T) &&
+ (ParamFT = dyn_cast<FunctionType>(
+ cast<PointerType>(T)->getElementType()))) {
+ std::string TName = "dfst";
+ TName += utostr(FT->getNumParams() - n);
+ TName += "$";
+ TName += F->getName();
+ Constant *T = DFSF.DFS.getOrBuildTrampolineFunction(ParamFT, TName);
+ Args.push_back(T);
+ Args.push_back(
+ IRB.CreateBitCast(*i, Type::getInt8PtrTy(*DFSF.DFS.Ctx)));
+ } else {
+ Args.push_back(*i);
+ }
+ }
+
+ i = CS.arg_begin();
+ for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
+ Args.push_back(DFSF.getShadow(*i));
+
+ if (!FT->getReturnType()->isVoidTy()) {
+ if (!DFSF.LabelReturnAlloca) {
+ DFSF.LabelReturnAlloca =
+ new AllocaInst(DFSF.DFS.ShadowTy, "labelreturn",
+ DFSF.F->getEntryBlock().begin());
+ }
+ Args.push_back(DFSF.LabelReturnAlloca);
+ }
+
+ CallInst *CustomCI = IRB.CreateCall(CustomF, Args);
+ CustomCI->setCallingConv(CI->getCallingConv());
+ CustomCI->setAttributes(CI->getAttributes());
+
+ if (!FT->getReturnType()->isVoidTy()) {
+ LoadInst *LabelLoad = IRB.CreateLoad(DFSF.LabelReturnAlloca);
+ DFSF.setShadow(CustomCI, LabelLoad);
+ }
+
+ CI->replaceAllUsesWith(CustomCI);
+ CI->eraseFromParent();
+ return;
+ }
+ break;
+ }
+ }
+ }
+
+ FunctionType *FT = cast<FunctionType>(
+ CS.getCalledValue()->getType()->getPointerElementType());
+ if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
+ for (unsigned i = 0, n = FT->getNumParams(); i != n; ++i) {
+ IRB.CreateStore(DFSF.getShadow(CS.getArgument(i)),
+ DFSF.getArgTLS(i, CS.getInstruction()));
+ }
+ }
+
+ Instruction *Next = 0;
+ if (!CS.getType()->isVoidTy()) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
+ if (II->getNormalDest()->getSinglePredecessor()) {
+ Next = II->getNormalDest()->begin();
+ } else {
+ BasicBlock *NewBB =
+ SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DFS);
+ Next = NewBB->begin();
+ }
+ } else {
+ Next = CS->getNextNode();
+ }
+
+ if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
+ IRBuilder<> NextIRB(Next);
+ LoadInst *LI = NextIRB.CreateLoad(DFSF.getRetvalTLS());
+ DFSF.SkipInsts.insert(LI);
+ DFSF.setShadow(CS.getInstruction(), LI);
+ DFSF.NonZeroChecks.insert(LI);
+ }
+ }
+
+ // Do all instrumentation for IA_Args down here to defer tampering with the
+ // CFG in a way that SplitEdge may be able to detect.
+ if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_Args) {
+ FunctionType *NewFT = DFSF.DFS.getArgsFunctionType(FT);
+ Value *Func =
+ IRB.CreateBitCast(CS.getCalledValue(), PointerType::getUnqual(NewFT));
+ std::vector<Value *> Args;
+
+ CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
+ Args.push_back(*i);
+
+ i = CS.arg_begin();
+ for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
+ Args.push_back(DFSF.getShadow(*i));
+
+ if (FT->isVarArg()) {
+ unsigned VarArgSize = CS.arg_size() - FT->getNumParams();
+ ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize);
+ AllocaInst *VarArgShadow =
+ new AllocaInst(VarArgArrayTy, "", DFSF.F->getEntryBlock().begin());
+ Args.push_back(IRB.CreateConstGEP2_32(VarArgShadow, 0, 0));
+ for (unsigned n = 0; i != e; ++i, ++n) {
+ IRB.CreateStore(DFSF.getShadow(*i),
+ IRB.CreateConstGEP2_32(VarArgShadow, 0, n));
+ Args.push_back(*i);
+ }
+ }
+
+ CallSite NewCS;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
+ NewCS = IRB.CreateInvoke(Func, II->getNormalDest(), II->getUnwindDest(),
+ Args);
+ } else {
+ NewCS = IRB.CreateCall(Func, Args);
+ }
+ NewCS.setCallingConv(CS.getCallingConv());
+ NewCS.setAttributes(CS.getAttributes().removeAttributes(
+ *DFSF.DFS.Ctx, AttributeSet::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCS.getInstruction()->getType(),
+ AttributeSet::ReturnIndex)));
+
+ if (Next) {
+ ExtractValueInst *ExVal =
+ ExtractValueInst::Create(NewCS.getInstruction(), 0, "", Next);
+ DFSF.SkipInsts.insert(ExVal);
+ ExtractValueInst *ExShadow =
+ ExtractValueInst::Create(NewCS.getInstruction(), 1, "", Next);
+ DFSF.SkipInsts.insert(ExShadow);
+ DFSF.setShadow(ExVal, ExShadow);
+ DFSF.NonZeroChecks.insert(ExShadow);
+
+ CS.getInstruction()->replaceAllUsesWith(ExVal);
+ }
+
+ CS.getInstruction()->eraseFromParent();
+ }
+}
+
+void DFSanVisitor::visitPHINode(PHINode &PN) {
+ PHINode *ShadowPN =
+ PHINode::Create(DFSF.DFS.ShadowTy, PN.getNumIncomingValues(), "", &PN);
+
+ // Give the shadow phi node valid predecessors to fool SplitEdge into working.
+ Value *UndefShadow = UndefValue::get(DFSF.DFS.ShadowTy);
+ for (PHINode::block_iterator i = PN.block_begin(), e = PN.block_end(); i != e;
+ ++i) {
+ ShadowPN->addIncoming(UndefShadow, *i);
+ }
+
+ DFSF.PHIFixups.push_back(std::make_pair(&PN, ShadowPN));
+ DFSF.setShadow(&PN, ShadowPN);
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/DebugIR.cpp b/contrib/llvm/lib/Transforms/Instrumentation/DebugIR.cpp
new file mode 100644
index 0000000..f50a044
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/DebugIR.cpp
@@ -0,0 +1,618 @@
+//===--- DebugIR.cpp - Transform debug metadata to allow debugging IR -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A Module transform pass that emits a succinct version of the IR and replaces
+// the source file metadata to allow debuggers to step through the IR.
+//
+// FIXME: instead of replacing debug metadata, this pass should allow for
+// additional metadata to be used to point capable debuggers to the IR file
+// without destroying the mapping to the original source file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "debug-ir"
+
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/Assembly/AssemblyAnnotationWriter.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+
+#include "DebugIR.h"
+
+#include <string>
+
+#define STR_HELPER(x) #x
+#define STR(x) STR_HELPER(x)
+
+using namespace llvm;
+
+namespace {
+
+/// Builds a map of Value* to line numbers on which the Value appears in a
+/// textual representation of the IR by plugging into the AssemblyWriter by
+/// masquerading as an AssemblyAnnotationWriter.
+class ValueToLineMap : public AssemblyAnnotationWriter {
+ ValueMap<const Value *, unsigned int> Lines;
+ typedef ValueMap<const Value *, unsigned int>::const_iterator LineIter;
+
+ void addEntry(const Value *V, formatted_raw_ostream &Out) {
+ Out.flush();
+ Lines.insert(std::make_pair(V, Out.getLine() + 1));
+ }
+
+public:
+
+ /// Prints Module to a null buffer in order to build the map of Value pointers
+ /// to line numbers.
+ ValueToLineMap(const Module *M) {
+ raw_null_ostream ThrowAway;
+ M->print(ThrowAway, this);
+ }
+
+ // This function is called after an Instruction, GlobalValue, or GlobalAlias
+ // is printed.
+ void printInfoComment(const Value &V, formatted_raw_ostream &Out) {
+ addEntry(&V, Out);
+ }
+
+ void emitFunctionAnnot(const Function *F, formatted_raw_ostream &Out) {
+ addEntry(F, Out);
+ }
+
+ /// If V appears on a line in the textual IR representation, sets Line to the
+ /// line number and returns true, otherwise returns false.
+ bool getLine(const Value *V, unsigned int &Line) const {
+ LineIter i = Lines.find(V);
+ if (i != Lines.end()) {
+ Line = i->second;
+ return true;
+ }
+ return false;
+ }
+};
+
+/// Removes debug intrisncs like llvm.dbg.declare and llvm.dbg.value.
+class DebugIntrinsicsRemover : public InstVisitor<DebugIntrinsicsRemover> {
+ void remove(Instruction &I) { I.eraseFromParent(); }
+
+public:
+ static void process(Module &M) {
+ DebugIntrinsicsRemover Remover;
+ Remover.visit(&M);
+ }
+ void visitDbgDeclareInst(DbgDeclareInst &I) { remove(I); }
+ void visitDbgValueInst(DbgValueInst &I) { remove(I); }
+ void visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) { remove(I); }
+};
+
+/// Removes debug metadata (!dbg) nodes from all instructions, and optionally
+/// metadata named "llvm.dbg.cu" if RemoveNamedInfo is true.
+class DebugMetadataRemover : public InstVisitor<DebugMetadataRemover> {
+ bool RemoveNamedInfo;
+
+public:
+ static void process(Module &M, bool RemoveNamedInfo = true) {
+ DebugMetadataRemover Remover(RemoveNamedInfo);
+ Remover.run(&M);
+ }
+
+ DebugMetadataRemover(bool RemoveNamedInfo)
+ : RemoveNamedInfo(RemoveNamedInfo) {}
+
+ void visitInstruction(Instruction &I) {
+ if (I.getMetadata(LLVMContext::MD_dbg))
+ I.setMetadata(LLVMContext::MD_dbg, 0);
+ }
+
+ void run(Module *M) {
+ // Remove debug metadata attached to instructions
+ visit(M);
+
+ if (RemoveNamedInfo) {
+ // Remove CU named metadata (and all children nodes)
+ NamedMDNode *Node = M->getNamedMetadata("llvm.dbg.cu");
+ if (Node)
+ M->eraseNamedMetadata(Node);
+ }
+ }
+};
+
+/// Updates debug metadata in a Module:
+/// - changes Filename/Directory to values provided on construction
+/// - adds/updates line number (DebugLoc) entries associated with each
+/// instruction to reflect the instruction's location in an LLVM IR file
+class DIUpdater : public InstVisitor<DIUpdater> {
+ /// Builder of debug information
+ DIBuilder Builder;
+
+ /// Helper for type attributes/sizes/etc
+ DataLayout Layout;
+
+ /// Map of Value* to line numbers
+ const ValueToLineMap LineTable;
+
+ /// Map of Value* (in original Module) to Value* (in optional cloned Module)
+ const ValueToValueMapTy *VMap;
+
+ /// Directory of debug metadata
+ DebugInfoFinder Finder;
+
+ /// Source filename and directory
+ StringRef Filename;
+ StringRef Directory;
+
+ // CU nodes needed when creating DI subprograms
+ MDNode *FileNode;
+ MDNode *LexicalBlockFileNode;
+ const MDNode *CUNode;
+
+ ValueMap<const Function *, MDNode *> SubprogramDescriptors;
+ DenseMap<const Type *, MDNode *> TypeDescriptors;
+
+public:
+ DIUpdater(Module &M, StringRef Filename = StringRef(),
+ StringRef Directory = StringRef(), const Module *DisplayM = 0,
+ const ValueToValueMapTy *VMap = 0)
+ : Builder(M), Layout(&M), LineTable(DisplayM ? DisplayM : &M), VMap(VMap),
+ Finder(), Filename(Filename), Directory(Directory), FileNode(0),
+ LexicalBlockFileNode(0), CUNode(0) {
+ Finder.processModule(M);
+ visit(&M);
+ }
+
+ ~DIUpdater() { Builder.finalize(); }
+
+ void visitModule(Module &M) {
+ if (Finder.compile_unit_count() > 1)
+ report_fatal_error("DebugIR pass supports only a signle compile unit per "
+ "Module.");
+ createCompileUnit(
+ Finder.compile_unit_count() == 1 ? *Finder.compile_unit_begin() : 0);
+ }
+
+ void visitFunction(Function &F) {
+ if (F.isDeclaration() || findDISubprogram(&F))
+ return;
+
+ StringRef MangledName = F.getName();
+ DICompositeType Sig = createFunctionSignature(&F);
+
+ // find line of function declaration
+ unsigned Line = 0;
+ if (!findLine(&F, Line)) {
+ DEBUG(dbgs() << "WARNING: No line for Function " << F.getName().str()
+ << "\n");
+ return;
+ }
+
+ Instruction *FirstInst = F.begin()->begin();
+ unsigned ScopeLine = 0;
+ if (!findLine(FirstInst, ScopeLine)) {
+ DEBUG(dbgs() << "WARNING: No line for 1st Instruction in Function "
+ << F.getName().str() << "\n");
+ return;
+ }
+
+ bool Local = F.hasInternalLinkage();
+ bool IsDefinition = !F.isDeclaration();
+ bool IsOptimized = false;
+
+ int FuncFlags = llvm::DIDescriptor::FlagPrototyped;
+ assert(CUNode && FileNode);
+ DISubprogram Sub = Builder.createFunction(
+ DICompileUnit(CUNode), F.getName(), MangledName, DIFile(FileNode), Line,
+ Sig, Local, IsDefinition, ScopeLine, FuncFlags, IsOptimized, &F);
+ assert(Sub.isSubprogram());
+ DEBUG(dbgs() << "create subprogram mdnode " << *Sub << ": "
+ << "\n");
+
+ SubprogramDescriptors.insert(std::make_pair(&F, Sub));
+ }
+
+ void visitInstruction(Instruction &I) {
+ DebugLoc Loc(I.getDebugLoc());
+
+ /// If a ValueToValueMap is provided, use it to get the real instruction as
+ /// the line table was generated on a clone of the module on which we are
+ /// operating.
+ Value *RealInst = 0;
+ if (VMap)
+ RealInst = VMap->lookup(&I);
+
+ if (!RealInst)
+ RealInst = &I;
+
+ unsigned Col = 0; // FIXME: support columns
+ unsigned Line;
+ if (!LineTable.getLine(RealInst, Line)) {
+ // Instruction has no line, it may have been removed (in the module that
+ // will be passed to the debugger) so there is nothing to do here.
+ DEBUG(dbgs() << "WARNING: no LineTable entry for instruction " << RealInst
+ << "\n");
+ DEBUG(RealInst->dump());
+ return;
+ }
+
+ DebugLoc NewLoc;
+ if (!Loc.isUnknown())
+ // I had a previous debug location: re-use the DebugLoc
+ NewLoc = DebugLoc::get(Line, Col, Loc.getScope(RealInst->getContext()),
+ Loc.getInlinedAt(RealInst->getContext()));
+ else if (MDNode *scope = findScope(&I))
+ NewLoc = DebugLoc::get(Line, Col, scope, 0);
+ else {
+ DEBUG(dbgs() << "WARNING: no valid scope for instruction " << &I
+ << ". no DebugLoc will be present."
+ << "\n");
+ return;
+ }
+
+ addDebugLocation(I, NewLoc);
+ }
+
+private:
+
+ void createCompileUnit(MDNode *CUToReplace) {
+ std::string Flags;
+ bool IsOptimized = false;
+ StringRef Producer;
+ unsigned RuntimeVersion(0);
+ StringRef SplitName;
+
+ if (CUToReplace) {
+ // save fields from existing CU to re-use in the new CU
+ DICompileUnit ExistingCU(CUToReplace);
+ Producer = ExistingCU.getProducer();
+ IsOptimized = ExistingCU.isOptimized();
+ Flags = ExistingCU.getFlags();
+ RuntimeVersion = ExistingCU.getRunTimeVersion();
+ SplitName = ExistingCU.getSplitDebugFilename();
+ } else {
+ Producer =
+ "LLVM Version " STR(LLVM_VERSION_MAJOR) "." STR(LLVM_VERSION_MINOR);
+ }
+
+ CUNode =
+ Builder.createCompileUnit(dwarf::DW_LANG_C99, Filename, Directory,
+ Producer, IsOptimized, Flags, RuntimeVersion);
+
+ if (CUToReplace)
+ CUToReplace->replaceAllUsesWith(const_cast<MDNode *>(CUNode));
+
+ DICompileUnit CU(CUNode);
+ FileNode = Builder.createFile(Filename, Directory);
+ LexicalBlockFileNode = Builder.createLexicalBlockFile(CU, DIFile(FileNode));
+ }
+
+ /// Returns the MDNode* that represents the DI scope to associate with I
+ MDNode *findScope(const Instruction *I) {
+ const Function *F = I->getParent()->getParent();
+ if (MDNode *ret = findDISubprogram(F))
+ return ret;
+
+ DEBUG(dbgs() << "WARNING: Using fallback lexical block file scope "
+ << LexicalBlockFileNode << " as scope for instruction " << I
+ << "\n");
+ return LexicalBlockFileNode;
+ }
+
+ /// Returns the MDNode* that is the descriptor for F
+ MDNode *findDISubprogram(const Function *F) {
+ typedef ValueMap<const Function *, MDNode *>::const_iterator FuncNodeIter;
+ FuncNodeIter i = SubprogramDescriptors.find(F);
+ if (i != SubprogramDescriptors.end())
+ return i->second;
+
+ DEBUG(dbgs() << "searching for DI scope node for Function " << F
+ << " in a list of " << Finder.subprogram_count()
+ << " subprogram nodes"
+ << "\n");
+
+ for (DebugInfoFinder::iterator i = Finder.subprogram_begin(),
+ e = Finder.subprogram_end();
+ i != e; ++i) {
+ DISubprogram S(*i);
+ if (S.getFunction() == F) {
+ DEBUG(dbgs() << "Found DISubprogram " << *i << " for function "
+ << S.getFunction() << "\n");
+ return *i;
+ }
+ }
+ DEBUG(dbgs() << "unable to find DISubprogram node for function "
+ << F->getName().str() << "\n");
+ return 0;
+ }
+
+ /// Sets Line to the line number on which V appears and returns true. If a
+ /// line location for V is not found, returns false.
+ bool findLine(const Value *V, unsigned &Line) {
+ if (LineTable.getLine(V, Line))
+ return true;
+
+ if (VMap) {
+ Value *mapped = VMap->lookup(V);
+ if (mapped && LineTable.getLine(mapped, Line))
+ return true;
+ }
+ return false;
+ }
+
+ std::string getTypeName(Type *T) {
+ std::string TypeName;
+ raw_string_ostream TypeStream(TypeName);
+ T->print(TypeStream);
+ TypeStream.flush();
+ return TypeName;
+ }
+
+ /// Returns the MDNode that represents type T if it is already created, or 0
+ /// if it is not.
+ MDNode *getType(const Type *T) {
+ typedef DenseMap<const Type *, MDNode *>::const_iterator TypeNodeIter;
+ TypeNodeIter i = TypeDescriptors.find(T);
+ if (i != TypeDescriptors.end())
+ return i->second;
+ return 0;
+ }
+
+ /// Returns a DebugInfo type from an LLVM type T.
+ DIDerivedType getOrCreateType(Type *T) {
+ MDNode *N = getType(T);
+ if (N)
+ return DIDerivedType(N);
+ else if (T->isVoidTy())
+ return DIDerivedType(0);
+ else if (T->isStructTy()) {
+ N = Builder.createStructType(
+ DIScope(LexicalBlockFileNode), T->getStructName(), DIFile(FileNode),
+ 0, Layout.getTypeSizeInBits(T), Layout.getABITypeAlignment(T), 0,
+ DIType(0), DIArray(0)); // filled in later
+
+ // N is added to the map (early) so that element search below can find it,
+ // so as to avoid infinite recursion for structs that contain pointers to
+ // their own type.
+ TypeDescriptors[T] = N;
+ DICompositeType StructDescriptor(N);
+
+ SmallVector<Value *, 4> Elements;
+ for (unsigned i = 0; i < T->getStructNumElements(); ++i)
+ Elements.push_back(getOrCreateType(T->getStructElementType(i)));
+
+ // set struct elements
+ StructDescriptor.setTypeArray(Builder.getOrCreateArray(Elements));
+ } else if (T->isPointerTy()) {
+ Type *PointeeTy = T->getPointerElementType();
+ if (!(N = getType(PointeeTy)))
+ N = Builder.createPointerType(
+ getOrCreateType(PointeeTy), Layout.getPointerTypeSizeInBits(T),
+ Layout.getPrefTypeAlignment(T), getTypeName(T));
+ } else if (T->isArrayTy()) {
+ SmallVector<Value *, 1> Subrange;
+ Subrange.push_back(
+ Builder.getOrCreateSubrange(0, T->getArrayNumElements() - 1));
+
+ N = Builder.createArrayType(Layout.getTypeSizeInBits(T),
+ Layout.getPrefTypeAlignment(T),
+ getOrCreateType(T->getArrayElementType()),
+ Builder.getOrCreateArray(Subrange));
+ } else {
+ int encoding = llvm::dwarf::DW_ATE_signed;
+ if (T->isIntegerTy())
+ encoding = llvm::dwarf::DW_ATE_unsigned;
+ else if (T->isFloatingPointTy())
+ encoding = llvm::dwarf::DW_ATE_float;
+
+ N = Builder.createBasicType(getTypeName(T), T->getPrimitiveSizeInBits(),
+ 0, encoding);
+ }
+ TypeDescriptors[T] = N;
+ return DIDerivedType(N);
+ }
+
+ /// Returns a DebugInfo type that represents a function signature for Func.
+ DICompositeType createFunctionSignature(const Function *Func) {
+ SmallVector<Value *, 4> Params;
+ DIDerivedType ReturnType(getOrCreateType(Func->getReturnType()));
+ Params.push_back(ReturnType);
+
+ const Function::ArgumentListType &Args(Func->getArgumentList());
+ for (Function::ArgumentListType::const_iterator i = Args.begin(),
+ e = Args.end();
+ i != e; ++i) {
+ Type *T(i->getType());
+ Params.push_back(getOrCreateType(T));
+ }
+
+ DIArray ParamArray = Builder.getOrCreateArray(Params);
+ return Builder.createSubroutineType(DIFile(FileNode), ParamArray);
+ }
+
+ /// Associates Instruction I with debug location Loc.
+ void addDebugLocation(Instruction &I, DebugLoc Loc) {
+ MDNode *MD = Loc.getAsMDNode(I.getContext());
+ I.setMetadata(LLVMContext::MD_dbg, MD);
+ }
+};
+
+/// Sets Filename/Directory from the Module identifier and returns true, or
+/// false if source information is not present.
+bool getSourceInfoFromModule(const Module &M, std::string &Directory,
+ std::string &Filename) {
+ std::string PathStr(M.getModuleIdentifier());
+ if (PathStr.length() == 0 || PathStr == "<stdin>")
+ return false;
+
+ Filename = sys::path::filename(PathStr);
+ SmallVector<char, 16> Path(PathStr.begin(), PathStr.end());
+ sys::path::remove_filename(Path);
+ Directory = StringRef(Path.data(), Path.size());
+ return true;
+}
+
+// Sets Filename/Directory from debug information in M and returns true, or
+// false if no debug information available, or cannot be parsed.
+bool getSourceInfoFromDI(const Module &M, std::string &Directory,
+ std::string &Filename) {
+ NamedMDNode *CUNode = M.getNamedMetadata("llvm.dbg.cu");
+ if (!CUNode || CUNode->getNumOperands() == 0)
+ return false;
+
+ DICompileUnit CU(CUNode->getOperand(0));
+ if (!CU.Verify())
+ return false;
+
+ Filename = CU.getFilename();
+ Directory = CU.getDirectory();
+ return true;
+}
+
+} // anonymous namespace
+
+namespace llvm {
+
+bool DebugIR::getSourceInfo(const Module &M) {
+ ParsedPath = getSourceInfoFromDI(M, Directory, Filename) ||
+ getSourceInfoFromModule(M, Directory, Filename);
+ return ParsedPath;
+}
+
+bool DebugIR::updateExtension(StringRef NewExtension) {
+ size_t dot = Filename.find_last_of(".");
+ if (dot == std::string::npos)
+ return false;
+
+ Filename.erase(dot);
+ Filename += NewExtension.str();
+ return true;
+}
+
+void DebugIR::generateFilename(OwningPtr<int> &fd) {
+ SmallVector<char, 16> PathVec;
+ fd.reset(new int);
+ sys::fs::createTemporaryFile("debug-ir", "ll", *fd, PathVec);
+ StringRef Path(PathVec.data(), PathVec.size());
+ Filename = sys::path::filename(Path);
+ sys::path::remove_filename(PathVec);
+ Directory = StringRef(PathVec.data(), PathVec.size());
+
+ GeneratedPath = true;
+}
+
+std::string DebugIR::getPath() {
+ SmallVector<char, 16> Path;
+ sys::path::append(Path, Directory, Filename);
+ Path.resize(Filename.size() + Directory.size() + 2);
+ Path[Filename.size() + Directory.size() + 1] = '\0';
+ return std::string(Path.data());
+}
+
+void DebugIR::writeDebugBitcode(const Module *M, int *fd) {
+ OwningPtr<raw_fd_ostream> Out;
+ std::string error;
+
+ if (!fd) {
+ std::string Path = getPath();
+ Out.reset(new raw_fd_ostream(Path.c_str(), error));
+ DEBUG(dbgs() << "WRITING debug bitcode from Module " << M << " to file "
+ << Path << "\n");
+ } else {
+ DEBUG(dbgs() << "WRITING debug bitcode from Module " << M << " to fd "
+ << *fd << "\n");
+ Out.reset(new raw_fd_ostream(*fd, true));
+ }
+
+ M->print(*Out, 0);
+ Out->close();
+}
+
+void DebugIR::createDebugInfo(Module &M, OwningPtr<Module> &DisplayM) {
+ if (M.getFunctionList().size() == 0)
+ // no functions -- no debug info needed
+ return;
+
+ OwningPtr<ValueToValueMapTy> VMap;
+
+ if (WriteSourceToDisk && (HideDebugIntrinsics || HideDebugMetadata)) {
+ VMap.reset(new ValueToValueMapTy);
+ DisplayM.reset(CloneModule(&M, *VMap));
+
+ if (HideDebugIntrinsics)
+ DebugIntrinsicsRemover::process(*DisplayM);
+
+ if (HideDebugMetadata)
+ DebugMetadataRemover::process(*DisplayM);
+ }
+
+ DIUpdater R(M, Filename, Directory, DisplayM.get(), VMap.get());
+}
+
+bool DebugIR::isMissingPath() { return Filename.empty() || Directory.empty(); }
+
+bool DebugIR::runOnModule(Module &M) {
+ OwningPtr<int> fd;
+
+ if (isMissingPath() && !getSourceInfo(M)) {
+ if (!WriteSourceToDisk)
+ report_fatal_error("DebugIR unable to determine file name in input. "
+ "Ensure Module contains an identifier, a valid "
+ "DICompileUnit, or construct DebugIR with "
+ "non-empty Filename/Directory parameters.");
+ else
+ generateFilename(fd);
+ }
+
+ if (!GeneratedPath && WriteSourceToDisk)
+ updateExtension(".debug-ll");
+
+ // Clear line numbers. Keep debug info (if any) if we were able to read the
+ // file name from the DICompileUnit descriptor.
+ DebugMetadataRemover::process(M, !ParsedPath);
+
+ OwningPtr<Module> DisplayM;
+ createDebugInfo(M, DisplayM);
+ if (WriteSourceToDisk) {
+ Module *OutputM = DisplayM.get() ? DisplayM.get() : &M;
+ writeDebugBitcode(OutputM, fd.get());
+ }
+
+ DEBUG(M.dump());
+ return true;
+}
+
+bool DebugIR::runOnModule(Module &M, std::string &Path) {
+ bool result = runOnModule(M);
+ Path = getPath();
+ return result;
+}
+
+} // llvm namespace
+
+char DebugIR::ID = 0;
+INITIALIZE_PASS(DebugIR, "debug-ir", "Enable debugging IR", false, false)
+
+ModulePass *llvm::createDebugIRPass(bool HideDebugIntrinsics,
+ bool HideDebugMetadata, StringRef Directory,
+ StringRef Filename) {
+ return new DebugIR(HideDebugIntrinsics, HideDebugMetadata, Directory,
+ Filename);
+}
+
+ModulePass *llvm::createDebugIRPass() { return new DebugIR(); }
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/DebugIR.h b/contrib/llvm/lib/Transforms/Instrumentation/DebugIR.h
new file mode 100644
index 0000000..13774cf
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/DebugIR.h
@@ -0,0 +1,99 @@
+//===- llvm/Transforms/Instrumentation/DebugIR.h - Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface of the DebugIR pass. For most users,
+// including Instrumentation.h and calling createDebugIRPass() is sufficient and
+// there is no need to include this file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_DEBUGIR_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_DEBUGIR_H
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+class DebugIR : public llvm::ModulePass {
+ /// If true, write a source file to disk.
+ bool WriteSourceToDisk;
+
+ /// Hide certain (non-essential) debug information (only relevant if
+ /// createSource is true.
+ bool HideDebugIntrinsics;
+ bool HideDebugMetadata;
+
+ /// The location of the source file.
+ std::string Directory;
+ std::string Filename;
+
+ /// True if a temporary file name was generated.
+ bool GeneratedPath;
+
+ /// True if the file name was read from the Module.
+ bool ParsedPath;
+
+public:
+ static char ID;
+
+ const char *getPassName() const { return "DebugIR"; }
+
+ /// Generate a file on disk to be displayed in a debugger. If Filename and
+ /// Directory are empty, a temporary path will be generated.
+ DebugIR(bool HideDebugIntrinsics, bool HideDebugMetadata,
+ llvm::StringRef Directory, llvm::StringRef Filename)
+ : ModulePass(ID), WriteSourceToDisk(true),
+ HideDebugIntrinsics(HideDebugIntrinsics),
+ HideDebugMetadata(HideDebugMetadata), Directory(Directory),
+ Filename(Filename), GeneratedPath(false), ParsedPath(false) {}
+
+ /// Modify input in-place; do not generate additional files, and do not hide
+ /// any debug intrinsics/metadata that might be present.
+ DebugIR()
+ : ModulePass(ID), WriteSourceToDisk(false), HideDebugIntrinsics(false),
+ HideDebugMetadata(false), GeneratedPath(false), ParsedPath(false) {}
+
+ /// Run pass on M and set Path to the source file path in the output module.
+ bool runOnModule(llvm::Module &M, std::string &Path);
+ bool runOnModule(llvm::Module &M);
+
+private:
+
+ /// Returns the concatenated Directory + Filename, without error checking
+ std::string getPath();
+
+ /// Attempts to read source information from debug information in M, and if
+ /// that fails, from M's identifier. Returns true on success, false otherwise.
+ bool getSourceInfo(const llvm::Module &M);
+
+ /// Replace the extension of Filename with NewExtension, and return true if
+ /// successful. Return false if extension could not be found or Filename is
+ /// empty.
+ bool updateExtension(llvm::StringRef NewExtension);
+
+ /// Generate a temporary filename and open an fd
+ void generateFilename(llvm::OwningPtr<int> &fd);
+
+ /// Creates DWARF CU/Subroutine metadata
+ void createDebugInfo(llvm::Module &M,
+ llvm::OwningPtr<llvm::Module> &DisplayM);
+
+ /// Returns true if either Directory or Filename is missing, false otherwise.
+ bool isMissingPath();
+
+ /// Write M to disk, optionally passing in an fd to an open file which is
+ /// closed by this function after writing. If no fd is specified, a new file
+ /// is opened, written, and closed.
+ void writeDebugBitcode(const llvm::Module *M, int *fd = 0);
+};
+
+} // llvm namespace
+
+#endif // LLVM_TRANSFORMS_INSTRUMENTATION_DEBUGIR_H
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp
deleted file mode 100644
index a2459fb..0000000
--- a/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-//===- EdgeProfiling.cpp - Insert counters for edge profiling -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass instruments the specified program with counters for edge profiling.
-// Edge profiling can give a reasonable approximation of the hot paths through a
-// program, and is used for a wide variety of program transformations.
-//
-// Note that this implementation is very naive. We insert a counter for *every*
-// edge in the program, instead of using control flow information to prune the
-// number of counters inserted.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "insert-edge-profiling"
-
-#include "llvm/Transforms/Instrumentation.h"
-#include "ProfilingUtils.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include <set>
-using namespace llvm;
-
-STATISTIC(NumEdgesInserted, "The # of edges inserted.");
-
-namespace {
- class EdgeProfiler : public ModulePass {
- bool runOnModule(Module &M);
- public:
- static char ID; // Pass identification, replacement for typeid
- EdgeProfiler() : ModulePass(ID) {
- initializeEdgeProfilerPass(*PassRegistry::getPassRegistry());
- }
-
- virtual const char *getPassName() const {
- return "Edge Profiler";
- }
- };
-}
-
-char EdgeProfiler::ID = 0;
-INITIALIZE_PASS(EdgeProfiler, "insert-edge-profiling",
- "Insert instrumentation for edge profiling", false, false)
-
-ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); }
-
-bool EdgeProfiler::runOnModule(Module &M) {
- Function *Main = M.getFunction("main");
- if (Main == 0) {
- errs() << "WARNING: cannot insert edge profiling into a module"
- << " with no main function!\n";
- return false; // No main, no instrumentation!
- }
-
- std::set<BasicBlock*> BlocksToInstrument;
- unsigned NumEdges = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- // Reserve space for (0,entry) edge.
- ++NumEdges;
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- // Keep track of which blocks need to be instrumented. We don't want to
- // instrument blocks that are added as the result of breaking critical
- // edges!
- BlocksToInstrument.insert(BB);
- NumEdges += BB->getTerminator()->getNumSuccessors();
- }
- }
-
- Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges);
- GlobalVariable *Counters =
- new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
- Constant::getNullValue(ATy), "EdgeProfCounters");
- NumEdgesInserted = NumEdges;
-
- // Instrument all of the edges...
- unsigned i = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- // Create counter for (0,entry) edge.
- IncrementCounterInBlock(&F->getEntryBlock(), i++, Counters);
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- if (BlocksToInstrument.count(BB)) { // Don't instrument inserted blocks
- // Okay, we have to add a counter of each outgoing edge. If the
- // outgoing edge is not critical don't split it, just insert the counter
- // in the source or destination of the edge.
- TerminatorInst *TI = BB->getTerminator();
- for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
- // If the edge is critical, split it.
- SplitCriticalEdge(TI, s, this);
-
- // Okay, we are guaranteed that the edge is no longer critical. If we
- // only have a single successor, insert the counter in this block,
- // otherwise insert it in the successor block.
- if (TI->getNumSuccessors() == 1) {
- // Insert counter at the start of the block
- IncrementCounterInBlock(BB, i++, Counters, false);
- } else {
- // Insert counter at the start of the block
- IncrementCounterInBlock(TI->getSuccessor(s), i++, Counters);
- }
- }
- }
- }
-
- // Add the initialization call to main.
- InsertProfilingInitCall(Main, "llvm_start_edge_profiling", Counters);
- return true;
-}
-
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 2edd151..206bffb 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -17,7 +17,6 @@
#define DEBUG_TYPE "insert-gcov-profiling"
#include "llvm/Transforms/Instrumentation.h"
-#include "ProfilingUtils.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
@@ -34,9 +33,10 @@
#include "llvm/Support/DebugLoc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InstIterator.h"
-#include "llvm/Support/PathV2.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <algorithm>
#include <string>
#include <utility>
using namespace llvm;
@@ -102,6 +102,7 @@ namespace {
Constant *getIncrementIndirectCounterFunc();
Constant *getEmitFunctionFunc();
Constant *getEmitArcsFunc();
+ Constant *getSummaryInfoFunc();
Constant *getDeleteWriteoutFunctionListFunc();
Constant *getDeleteFlushFunctionListFunc();
Constant *getEndFileFunc();
@@ -153,10 +154,10 @@ static std::string getFunctionName(DISubprogram SP) {
namespace {
class GCOVRecord {
protected:
- static const char *LinesTag;
- static const char *FunctionTag;
- static const char *BlockTag;
- static const char *EdgeTag;
+ static const char *const LinesTag;
+ static const char *const FunctionTag;
+ static const char *const BlockTag;
+ static const char *const EdgeTag;
GCOVRecord() {}
@@ -170,7 +171,7 @@ namespace {
// Returns the length measured in 4-byte blocks that will be used to
// represent this string in a GCOV file
- unsigned lengthOfGCOVString(StringRef s) {
+ static unsigned lengthOfGCOVString(StringRef s) {
// A GCOV string is a length, followed by a NUL, then between 0 and 3 NULs
// padding out to the next 4-byte word. The length is measured in 4-byte
// words including padding, not bytes of actual string.
@@ -190,10 +191,10 @@ namespace {
raw_ostream *os;
};
- const char *GCOVRecord::LinesTag = "\0\0\x45\x01";
- const char *GCOVRecord::FunctionTag = "\0\0\0\1";
- const char *GCOVRecord::BlockTag = "\0\0\x41\x01";
- const char *GCOVRecord::EdgeTag = "\0\0\x43\x01";
+ const char *const GCOVRecord::LinesTag = "\0\0\x45\x01";
+ const char *const GCOVRecord::FunctionTag = "\0\0\0\1";
+ const char *const GCOVRecord::BlockTag = "\0\0\x41\x01";
+ const char *const GCOVRecord::EdgeTag = "\0\0\x43\x01";
class GCOVFunction;
class GCOVBlock;
@@ -207,7 +208,7 @@ namespace {
Lines.push_back(Line);
}
- uint32_t length() {
+ uint32_t length() const {
// Here 2 = 1 for string length + 1 for '0' id#.
return lengthOfGCOVString(Filename) + 2 + Lines.size();
}
@@ -229,6 +230,15 @@ namespace {
SmallVector<uint32_t, 32> Lines;
};
+
+ // Sorting function for deterministic behaviour in GCOVBlock::writeOut.
+ struct StringKeySort {
+ bool operator()(StringMapEntry<GCOVLines *> *LHS,
+ StringMapEntry<GCOVLines *> *RHS) const {
+ return LHS->getKey() < RHS->getKey();
+ }
+ };
+
// Represent a basic block in GCOV. Each block has a unique number in the
// function, number of lines belonging to each block, and a set of edges to
// other blocks.
@@ -248,17 +258,23 @@ namespace {
void writeOut() {
uint32_t Len = 3;
+ SmallVector<StringMapEntry<GCOVLines *> *, 32> SortedLinesByFile;
for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(),
E = LinesByFile.end(); I != E; ++I) {
Len += I->second->length();
+ SortedLinesByFile.push_back(&*I);
}
writeBytes(LinesTag, 4);
write(Len);
write(Number);
- for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(),
- E = LinesByFile.end(); I != E; ++I)
- I->second->writeOut();
+
+ StringKeySort Sorter;
+ std::sort(SortedLinesByFile.begin(), SortedLinesByFile.end(), Sorter);
+ for (SmallVectorImpl<StringMapEntry<GCOVLines *> *>::iterator
+ I = SortedLinesByFile.begin(), E = SortedLinesByFile.end();
+ I != E; ++I)
+ (*I)->getValue()->writeOut();
write(0);
write(0);
}
@@ -335,9 +351,10 @@ namespace {
DEBUG(dbgs() << Blocks.size() << " blocks.\n");
// Emit edges between blocks.
- for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(),
- E = Blocks.end(); I != E; ++I) {
- GCOVBlock &Block = *I->second;
+ if (Blocks.empty()) return;
+ Function *F = Blocks.begin()->first->getParent();
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ GCOVBlock &Block = *Blocks[I];
if (Block.OutEdges.empty()) continue;
writeBytes(EdgeTag, 4);
@@ -352,9 +369,8 @@ namespace {
}
// Emit lines for each block.
- for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(),
- E = Blocks.end(); I != E; ++I) {
- I->second->writeOut();
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ Blocks[I]->writeOut();
}
}
@@ -410,7 +426,7 @@ void GCOVProfiler::emitProfileNotes() {
DICompileUnit CU(CU_Nodes->getOperand(i));
std::string ErrorInfo;
raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo,
- raw_fd_ostream::F_Binary);
+ sys::fs::F_Binary);
out.write("oncg", 4);
out.write(ReversedVersion, 4);
out.write("MVLL", 4);
@@ -418,7 +434,10 @@ void GCOVProfiler::emitProfileNotes() {
DIArray SPs = CU.getSubprograms();
for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
DISubprogram SP(SPs.getElement(i));
- if (!SP.Verify()) continue;
+ assert((!SP || SP.isSubprogram()) &&
+ "A MDNode in subprograms of a CU should be null or a DISubprogram.");
+ if (!SP)
+ continue;
Function *F = SP.getFunction();
if (!F) continue;
@@ -467,7 +486,10 @@ bool GCOVProfiler::emitProfileArcs() {
SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
DISubprogram SP(SPs.getElement(i));
- if (!SP.Verify()) continue;
+ assert((!SP || SP.isSubprogram()) &&
+ "A MDNode in subprograms of a CU should be null or a DISubprogram.");
+ if (!SP)
+ continue;
Function *F = SP.getFunction();
if (!F) continue;
if (!Result) Result = true;
@@ -497,15 +519,15 @@ bool GCOVProfiler::emitProfileArcs() {
TerminatorInst *TI = BB->getTerminator();
int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
if (Successors) {
- IRBuilder<> Builder(TI);
-
if (Successors == 1) {
+ IRBuilder<> Builder(BB->getFirstInsertionPt());
Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
Edge);
Value *Count = Builder.CreateLoad(Counter);
Count = Builder.CreateAdd(Count, Builder.getInt64(1));
Builder.CreateStore(Count, Counter);
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ IRBuilder<> Builder(BI);
Value *Sel = Builder.CreateSelect(BI->getCondition(),
Builder.getInt64(Edge),
Builder.getInt64(Edge + 1));
@@ -521,6 +543,7 @@ bool GCOVProfiler::emitProfileArcs() {
for (int i = 0; i != Successors; ++i)
ComplexEdgeSuccs.insert(TI->getSuccessor(i));
}
+
Edge += Successors;
}
}
@@ -532,14 +555,13 @@ bool GCOVProfiler::emitProfileArcs() {
GlobalVariable *EdgeState = getEdgeStateValue();
for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
- IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator());
+ IRBuilder<> Builder(ComplexEdgePreds[i + 1]->getFirstInsertionPt());
Builder.CreateStore(Builder.getInt32(i), EdgeState);
}
+
for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
- // call runtime to perform increment
- BasicBlock::iterator InsertPt =
- ComplexEdgeSuccs[i+1]->getFirstInsertionPt();
- IRBuilder<> Builder(InsertPt);
+ // Call runtime to perform increment.
+ IRBuilder<> Builder(ComplexEdgeSuccs[i+1]->getFirstInsertionPt());
Value *CounterPtrArray =
Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0,
i * ComplexEdgePreds.size());
@@ -577,7 +599,7 @@ bool GCOVProfiler::emitProfileArcs() {
};
FTy = FunctionType::get(Builder.getVoidTy(), Params, false);
- // Inialize the environment and register the local writeout and flush
+ // Initialize the environment and register the local writeout and flush
// functions.
Constant *GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
Builder.CreateCall2(GCOVInit, WriteoutF, FlushF);
@@ -679,6 +701,11 @@ Constant *GCOVProfiler::getEmitArcsFunc() {
return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy);
}
+Constant *GCOVProfiler::getSummaryInfoFunc() {
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ return M->getOrInsertFunction("llvm_gcda_summary_info", FTy);
+}
+
Constant *GCOVProfiler::getDeleteWriteoutFunctionListFunc() {
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
return M->getOrInsertFunction("llvm_delete_writeout_function_list", FTy);
@@ -725,6 +752,7 @@ Function *GCOVProfiler::insertCounterWriteout(
Constant *StartFile = getStartFileFunc();
Constant *EmitFunction = getEmitFunctionFunc();
Constant *EmitArcs = getEmitArcsFunc();
+ Constant *SummaryInfo = getSummaryInfoFunc();
Constant *EndFile = getEndFileFunc();
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
@@ -751,6 +779,7 @@ Function *GCOVProfiler::insertCounterWriteout(
Builder.getInt32(Arcs),
Builder.CreateConstGEP2_64(GV, 0, 0));
}
+ Builder.CreateCall(SummaryInfo);
Builder.CreateCall(EndFile);
}
}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
index 9f35396..b1bea38 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -24,12 +24,10 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
initializeAddressSanitizerPass(Registry);
initializeAddressSanitizerModulePass(Registry);
initializeBoundsCheckingPass(Registry);
- initializeEdgeProfilerPass(Registry);
initializeGCOVProfilerPass(Registry);
- initializeOptimalEdgeProfilerPass(Registry);
- initializePathProfilerPass(Registry);
initializeMemorySanitizerPass(Registry);
initializeThreadSanitizerPass(Registry);
+ initializeDataFlowSanitizerPass(Registry);
}
/// LLVMInitializeInstrumentation - C binding for
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 4e75904..d547adc 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -66,6 +66,31 @@
/// avoids storing origin to memory when a fully initialized value is stored.
/// This way it avoids needless overwritting origin of the 4-byte region on
/// a short (i.e. 1 byte) clean store, and it is also good for performance.
+///
+/// Atomic handling.
+///
+/// Ideally, every atomic store of application value should update the
+/// corresponding shadow location in an atomic way. Unfortunately, atomic store
+/// of two disjoint locations can not be done without severe slowdown.
+///
+/// Therefore, we implement an approximation that may err on the safe side.
+/// In this implementation, every atomically accessed location in the program
+/// may only change from (partially) uninitialized to fully initialized, but
+/// not the other way around. We load the shadow _after_ the application load,
+/// and we store the shadow _before_ the app store. Also, we always store clean
+/// shadow (if the application store is atomic). This way, if the store-load
+/// pair constitutes a happens-before arc, shadow store and load are correctly
+/// ordered such that the load will get either the value that was stored, or
+/// some later value (which is always clean).
+///
+/// This does not work very well with Compare-And-Swap (CAS) and
+/// Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW
+/// must store the new shadow before the app operation, and load the shadow
+/// after the app operation. Computers don't work this way. Current
+/// implementation ignores the load aspect of CAS/RMW, always returning a clean
+/// value. It implements the store part as a simple atomic store by storing a
+/// clean shadow.
+
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "msan"
@@ -74,6 +99,7 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/ADT/ValueMap.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -90,9 +116,9 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/BlackList.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Transforms/Utils/SpecialCaseList.h"
using namespace llvm;
@@ -156,6 +182,18 @@ static cl::opt<std::string> ClBlacklistFile("msan-blacklist",
cl::desc("File containing the list of functions where MemorySanitizer "
"should not report bugs"), cl::Hidden);
+// Experimental. Wraps all indirect calls in the instrumented code with
+// a call to the given function. This is needed to assist the dynamic
+// helper tool (MSanDR) to regain control on transition between instrumented and
+// non-instrumented code.
+static cl::opt<std::string> ClWrapIndirectCalls("msan-wrap-indirect-calls",
+ cl::desc("Wrap indirect calls with a given function"),
+ cl::Hidden);
+
+static cl::opt<bool> ClWrapIndirectCallsFast("msan-wrap-indirect-calls-fast",
+ cl::desc("Do not wrap indirect calls with target in the same module"),
+ cl::Hidden, cl::init(true));
+
namespace {
/// \brief An instrumentation pass implementing detection of uninitialized
@@ -167,12 +205,12 @@ class MemorySanitizer : public FunctionPass {
public:
MemorySanitizer(bool TrackOrigins = false,
StringRef BlacklistFile = StringRef())
- : FunctionPass(ID),
- TrackOrigins(TrackOrigins || ClTrackOrigins),
- TD(0),
- WarningFn(0),
- BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
- : BlacklistFile) { }
+ : FunctionPass(ID),
+ TrackOrigins(TrackOrigins || ClTrackOrigins),
+ TD(0),
+ WarningFn(0),
+ BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile : BlacklistFile),
+ WrapIndirectCalls(!ClWrapIndirectCalls.empty()) {}
const char *getPassName() const { return "MemorySanitizer"; }
bool runOnFunction(Function &F);
bool doInitialization(Module &M);
@@ -206,13 +244,16 @@ class MemorySanitizer : public FunctionPass {
/// function.
GlobalVariable *OriginTLS;
+ GlobalVariable *MsandrModuleStart;
+ GlobalVariable *MsandrModuleEnd;
+
/// \brief The run-time callback to print a warning.
Value *WarningFn;
/// \brief Run-time helper that copies origin info for a memory range.
Value *MsanCopyOriginFn;
/// \brief Run-time helper that generates a new origin value for a stack
/// allocation.
- Value *MsanSetAllocaOriginFn;
+ Value *MsanSetAllocaOrigin4Fn;
/// \brief Run-time helper that poisons stack on function entry.
Value *MsanPoisonStackFn;
/// \brief MSan runtime replacements for memmove, memcpy and memset.
@@ -228,13 +269,19 @@ class MemorySanitizer : public FunctionPass {
MDNode *ColdCallWeights;
/// \brief Branch weights for origin store.
MDNode *OriginStoreWeights;
- /// \bried Path to blacklist file.
+ /// \brief Path to blacklist file.
SmallString<64> BlacklistFile;
/// \brief The blacklist.
- OwningPtr<BlackList> BL;
+ OwningPtr<SpecialCaseList> BL;
/// \brief An empty volatile inline asm that prevents callback merge.
InlineAsm *EmptyAsm;
+ bool WrapIndirectCalls;
+ /// \brief Run-time wrapper for indirect calls.
+ Value *IndirectCallWrapperFn;
+ // Argument and return type of IndirectCallWrapperFn: void (*f)(void).
+ Type *AnyFunctionPtrTy;
+
friend struct MemorySanitizerVisitor;
friend struct VarArgAMD64Helper;
};
@@ -280,9 +327,9 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
MsanCopyOriginFn = M.getOrInsertFunction(
"__msan_copy_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(),
IRB.getInt8PtrTy(), IntptrTy, NULL);
- MsanSetAllocaOriginFn = M.getOrInsertFunction(
- "__msan_set_alloca_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
- IRB.getInt8PtrTy(), NULL);
+ MsanSetAllocaOrigin4Fn = M.getOrInsertFunction(
+ "__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
+ IRB.getInt8PtrTy(), IntptrTy, NULL);
MsanPoisonStackFn = M.getOrInsertFunction(
"__msan_poison_stack", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, NULL);
MemmoveFn = M.getOrInsertFunction(
@@ -299,35 +346,53 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
RetvalTLS = new GlobalVariable(
M, ArrayType::get(IRB.getInt64Ty(), 8), false,
GlobalVariable::ExternalLinkage, 0, "__msan_retval_tls", 0,
- GlobalVariable::GeneralDynamicTLSModel);
+ GlobalVariable::InitialExecTLSModel);
RetvalOriginTLS = new GlobalVariable(
M, OriginTy, false, GlobalVariable::ExternalLinkage, 0,
- "__msan_retval_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel);
+ "__msan_retval_origin_tls", 0, GlobalVariable::InitialExecTLSModel);
ParamTLS = new GlobalVariable(
M, ArrayType::get(IRB.getInt64Ty(), 1000), false,
GlobalVariable::ExternalLinkage, 0, "__msan_param_tls", 0,
- GlobalVariable::GeneralDynamicTLSModel);
+ GlobalVariable::InitialExecTLSModel);
ParamOriginTLS = new GlobalVariable(
M, ArrayType::get(OriginTy, 1000), false, GlobalVariable::ExternalLinkage,
- 0, "__msan_param_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel);
+ 0, "__msan_param_origin_tls", 0, GlobalVariable::InitialExecTLSModel);
VAArgTLS = new GlobalVariable(
M, ArrayType::get(IRB.getInt64Ty(), 1000), false,
GlobalVariable::ExternalLinkage, 0, "__msan_va_arg_tls", 0,
- GlobalVariable::GeneralDynamicTLSModel);
+ GlobalVariable::InitialExecTLSModel);
VAArgOverflowSizeTLS = new GlobalVariable(
M, IRB.getInt64Ty(), false, GlobalVariable::ExternalLinkage, 0,
"__msan_va_arg_overflow_size_tls", 0,
- GlobalVariable::GeneralDynamicTLSModel);
+ GlobalVariable::InitialExecTLSModel);
OriginTLS = new GlobalVariable(
M, IRB.getInt32Ty(), false, GlobalVariable::ExternalLinkage, 0,
- "__msan_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel);
+ "__msan_origin_tls", 0, GlobalVariable::InitialExecTLSModel);
// We insert an empty inline asm after __msan_report* to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
StringRef(""), StringRef(""),
/*hasSideEffects=*/true);
+
+ if (WrapIndirectCalls) {
+ AnyFunctionPtrTy =
+ PointerType::getUnqual(FunctionType::get(IRB.getVoidTy(), false));
+ IndirectCallWrapperFn = M.getOrInsertFunction(
+ ClWrapIndirectCalls, AnyFunctionPtrTy, AnyFunctionPtrTy, NULL);
+ }
+
+ if (ClWrapIndirectCallsFast) {
+ MsandrModuleStart = new GlobalVariable(
+ M, IRB.getInt32Ty(), false, GlobalValue::ExternalLinkage,
+ 0, "__executable_start");
+ MsandrModuleStart->setVisibility(GlobalVariable::HiddenVisibility);
+ MsandrModuleEnd = new GlobalVariable(
+ M, IRB.getInt32Ty(), false, GlobalValue::ExternalLinkage,
+ 0, "_end");
+ MsandrModuleEnd->setVisibility(GlobalVariable::HiddenVisibility);
+ }
}
/// \brief Module-level initialization.
@@ -337,7 +402,7 @@ bool MemorySanitizer::doInitialization(Module &M) {
TD = getAnalysisIfAvailable<DataLayout>();
if (!TD)
return false;
- BL.reset(new BlackList(BlacklistFile));
+ BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
C = &(M.getContext());
unsigned PtrSize = TD->getPointerSizeInBits(/* AddressSpace */0);
switch (PtrSize) {
@@ -365,11 +430,13 @@ bool MemorySanitizer::doInitialization(Module &M) {
appendToGlobalCtors(M, cast<Function>(M.getOrInsertFunction(
"__msan_init", IRB.getVoidTy(), NULL)), 0);
- new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
- IRB.getInt32(TrackOrigins), "__msan_track_origins");
+ if (TrackOrigins)
+ new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
+ IRB.getInt32(TrackOrigins), "__msan_track_origins");
- new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
- IRB.getInt32(ClKeepGoing), "__msan_keep_going");
+ if (ClKeepGoing)
+ new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
+ IRB.getInt32(ClKeepGoing), "__msan_keep_going");
return true;
}
@@ -420,27 +487,40 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
MemorySanitizer &MS;
SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
ValueMap<Value*, Value*> ShadowMap, OriginMap;
+ OwningPtr<VarArgHelper> VAHelper;
+
+ // The following flags disable parts of MSan instrumentation based on
+ // blacklist contents and command-line options.
bool InsertChecks;
bool LoadShadow;
- OwningPtr<VarArgHelper> VAHelper;
+ bool PoisonStack;
+ bool PoisonUndef;
+ bool CheckReturnValue;
struct ShadowOriginAndInsertPoint {
- Instruction *Shadow;
- Instruction *Origin;
+ Value *Shadow;
+ Value *Origin;
Instruction *OrigIns;
- ShadowOriginAndInsertPoint(Instruction *S, Instruction *O, Instruction *I)
+ ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I)
: Shadow(S), Origin(O), OrigIns(I) { }
ShadowOriginAndInsertPoint() : Shadow(0), Origin(0), OrigIns(0) { }
};
SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
SmallVector<Instruction*, 16> StoreList;
+ SmallVector<CallSite, 16> IndirectCallList;
MemorySanitizerVisitor(Function &F, MemorySanitizer &MS)
: F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) {
- LoadShadow = InsertChecks =
- !MS.BL->isIn(F) &&
- F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::SanitizeMemory);
+ bool SanitizeFunction = !MS.BL->isIn(F) && F.getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex,
+ Attribute::SanitizeMemory);
+ InsertChecks = SanitizeFunction;
+ LoadShadow = SanitizeFunction;
+ PoisonStack = SanitizeFunction && ClPoisonStack;
+ PoisonUndef = SanitizeFunction && ClPoisonUndef;
+ // FIXME: Consider using SpecialCaseList to specify a list of functions that
+ // must always return fully initialized values. For now, we hardcode "main".
+ CheckReturnValue = SanitizeFunction && (F.getName() == "main");
DEBUG(if (!InsertChecks)
dbgs() << "MemorySanitizer is not inserting checks into '"
@@ -454,7 +534,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRBuilder<> IRB(&I);
Value *Val = I.getValueOperand();
Value *Addr = I.getPointerOperand();
- Value *Shadow = getShadow(Val);
+ Value *Shadow = I.isAtomic() ? getCleanShadow(Val) : getShadow(Val);
Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB);
StoreInst *NewSI =
@@ -463,7 +543,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
(void)NewSI;
if (ClCheckAccessAddress)
- insertCheck(Addr, &I);
+ insertShadowCheck(Addr, &I);
+
+ if (I.isAtomic())
+ I.setOrdering(addReleaseOrdering(I.getOrdering()));
if (MS.TrackOrigins) {
unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment());
@@ -473,11 +556,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
} else {
Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
- Constant *Cst = dyn_cast_or_null<Constant>(ConvertedShadow);
// TODO(eugenis): handle non-zero constant shadow by inserting an
// unconditional check (can not simply fail compilation as this could
// be in the dead code).
- if (Cst)
+ if (isa<Constant>(ConvertedShadow))
continue;
Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
@@ -495,12 +577,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void materializeChecks() {
for (size_t i = 0, n = InstrumentationList.size(); i < n; i++) {
- Instruction *Shadow = InstrumentationList[i].Shadow;
+ Value *Shadow = InstrumentationList[i].Shadow;
Instruction *OrigIns = InstrumentationList[i].OrigIns;
IRBuilder<> IRB(OrigIns);
DEBUG(dbgs() << " SHAD0 : " << *Shadow << "\n");
Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
DEBUG(dbgs() << " SHAD1 : " << *ConvertedShadow << "\n");
+ // See the comment in materializeStores().
+ if (isa<Constant>(ConvertedShadow))
+ continue;
Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
getCleanShadow(ConvertedShadow), "_mscmp");
Instruction *CheckTerm =
@@ -510,7 +595,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRB.SetInsertPoint(CheckTerm);
if (MS.TrackOrigins) {
- Instruction *Origin = InstrumentationList[i].Origin;
+ Value *Origin = InstrumentationList[i].Origin;
IRB.CreateStore(Origin ? (Value*)Origin : (Value*)IRB.getInt32(0),
MS.OriginTLS);
}
@@ -522,6 +607,48 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
DEBUG(dbgs() << "DONE:\n" << F);
}
+ void materializeIndirectCalls() {
+ for (size_t i = 0, n = IndirectCallList.size(); i < n; i++) {
+ CallSite CS = IndirectCallList[i];
+ Instruction *I = CS.getInstruction();
+ BasicBlock *B = I->getParent();
+ IRBuilder<> IRB(I);
+ Value *Fn0 = CS.getCalledValue();
+ Value *Fn = IRB.CreateBitCast(Fn0, MS.AnyFunctionPtrTy);
+
+ if (ClWrapIndirectCallsFast) {
+ // Check that call target is inside this module limits.
+ Value *Start =
+ IRB.CreateBitCast(MS.MsandrModuleStart, MS.AnyFunctionPtrTy);
+ Value *End = IRB.CreateBitCast(MS.MsandrModuleEnd, MS.AnyFunctionPtrTy);
+
+ Value *NotInThisModule = IRB.CreateOr(IRB.CreateICmpULT(Fn, Start),
+ IRB.CreateICmpUGE(Fn, End));
+
+ PHINode *NewFnPhi =
+ IRB.CreatePHI(Fn0->getType(), 2, "msandr.indirect_target");
+
+ Instruction *CheckTerm = SplitBlockAndInsertIfThen(
+ cast<Instruction>(NotInThisModule),
+ /* Unreachable */ false, MS.ColdCallWeights);
+
+ IRB.SetInsertPoint(CheckTerm);
+ // Slow path: call wrapper function to possibly transform the call
+ // target.
+ Value *NewFn = IRB.CreateBitCast(
+ IRB.CreateCall(MS.IndirectCallWrapperFn, Fn), Fn0->getType());
+
+ NewFnPhi->addIncoming(Fn0, B);
+ NewFnPhi->addIncoming(NewFn, dyn_cast<Instruction>(NewFn)->getParent());
+ CS.setCalledFunction(NewFnPhi);
+ } else {
+ Value *NewFn = IRB.CreateBitCast(
+ IRB.CreateCall(MS.IndirectCallWrapperFn, Fn), Fn0->getType());
+ CS.setCalledFunction(NewFn);
+ }
+ }
+ }
+
/// \brief Add MemorySanitizer instrumentation to a function.
bool runOnFunction() {
MS.initializeCallbacks(*F.getParent());
@@ -564,6 +691,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Insert shadow value checks.
materializeChecks();
+ // Wrap indirect calls.
+ materializeIndirectCalls();
+
return true;
}
@@ -741,7 +871,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return Shadow;
}
if (UndefValue *U = dyn_cast<UndefValue>(V)) {
- Value *AllOnes = ClPoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V);
+ Value *AllOnes = PoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V);
DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
(void)U;
return AllOnes;
@@ -768,14 +898,21 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (AI->hasByValAttr()) {
// ByVal pointer itself has clean shadow. We copy the actual
// argument shadow to the underlying memory.
+ // Figure out maximal valid memcpy alignment.
+ unsigned ArgAlign = AI->getParamAlignment();
+ if (ArgAlign == 0) {
+ Type *EltType = A->getType()->getPointerElementType();
+ ArgAlign = MS.TD->getABITypeAlignment(EltType);
+ }
+ unsigned CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
Value *Cpy = EntryIRB.CreateMemCpy(
- getShadowPtr(V, EntryIRB.getInt8Ty(), EntryIRB),
- Base, Size, AI->getParamAlignment());
+ getShadowPtr(V, EntryIRB.getInt8Ty(), EntryIRB), Base, Size,
+ CopyAlign);
DEBUG(dbgs() << " ByValCpy: " << *Cpy << "\n");
(void)Cpy;
*ShadowPtr = getCleanShadow(V);
} else {
- *ShadowPtr = EntryIRB.CreateLoad(Base);
+ *ShadowPtr = EntryIRB.CreateAlignedLoad(Base, kShadowTLSAlignment);
}
DEBUG(dbgs() << " ARG: " << *AI << " ==> " <<
**ShadowPtr << "\n");
@@ -784,7 +921,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(A, EntryIRB.CreateLoad(OriginPtr));
}
}
- ArgOffset += DataLayout::RoundUpAlignment(Size, 8);
+ ArgOffset += DataLayout::RoundUpAlignment(Size, kShadowTLSAlignment);
}
assert(*ShadowPtr && "Could not find shadow for an argument");
return *ShadowPtr;
@@ -820,20 +957,63 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// \brief Remember the place where a shadow check should be inserted.
///
/// This location will be later instrumented with a check that will print a
- /// UMR warning in runtime if the value is not fully defined.
- void insertCheck(Value *Val, Instruction *OrigIns) {
- assert(Val);
+ /// UMR warning in runtime if the shadow value is not 0.
+ void insertShadowCheck(Value *Shadow, Value *Origin, Instruction *OrigIns) {
+ assert(Shadow);
if (!InsertChecks) return;
- Instruction *Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
- if (!Shadow) return;
#ifndef NDEBUG
Type *ShadowTy = Shadow->getType();
assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy)) &&
"Can only insert checks for integer and vector shadow types");
#endif
- Instruction *Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
InstrumentationList.push_back(
- ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
+ ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
+ }
+
+ /// \brief Remember the place where a shadow check should be inserted.
+ ///
+ /// This location will be later instrumented with a check that will print a
+ /// UMR warning in runtime if the value is not fully defined.
+ void insertShadowCheck(Value *Val, Instruction *OrigIns) {
+ assert(Val);
+ Instruction *Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
+ if (!Shadow) return;
+ Instruction *Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
+ insertShadowCheck(Shadow, Origin, OrigIns);
+ }
+
+ AtomicOrdering addReleaseOrdering(AtomicOrdering a) {
+ switch (a) {
+ case NotAtomic:
+ return NotAtomic;
+ case Unordered:
+ case Monotonic:
+ case Release:
+ return Release;
+ case Acquire:
+ case AcquireRelease:
+ return AcquireRelease;
+ case SequentiallyConsistent:
+ return SequentiallyConsistent;
+ }
+ llvm_unreachable("Unknown ordering");
+ }
+
+ AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
+ switch (a) {
+ case NotAtomic:
+ return NotAtomic;
+ case Unordered:
+ case Monotonic:
+ case Acquire:
+ return Acquire;
+ case Release:
+ case AcquireRelease:
+ return AcquireRelease;
+ case SequentiallyConsistent:
+ return SequentiallyConsistent;
+ }
+ llvm_unreachable("Unknown ordering");
}
// ------------------- Visitors.
@@ -844,7 +1024,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// Optionally, checks that the load address is fully defined.
void visitLoadInst(LoadInst &I) {
assert(I.getType()->isSized() && "Load type must have size");
- IRBuilder<> IRB(&I);
+ IRBuilder<> IRB(I.getNextNode());
Type *ShadowTy = getShadowTy(&I);
Value *Addr = I.getPointerOperand();
if (LoadShadow) {
@@ -856,7 +1036,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
if (ClCheckAccessAddress)
- insertCheck(I.getPointerOperand(), &I);
+ insertShadowCheck(I.getPointerOperand(), &I);
+
+ if (I.isAtomic())
+ I.setOrdering(addAcquireOrdering(I.getOrdering()));
if (MS.TrackOrigins) {
if (LoadShadow) {
@@ -877,9 +1060,40 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
StoreList.push_back(&I);
}
+ void handleCASOrRMW(Instruction &I) {
+ assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
+
+ IRBuilder<> IRB(&I);
+ Value *Addr = I.getOperand(0);
+ Value *ShadowPtr = getShadowPtr(Addr, I.getType(), IRB);
+
+ if (ClCheckAccessAddress)
+ insertShadowCheck(Addr, &I);
+
+ // Only test the conditional argument of cmpxchg instruction.
+ // The other argument can potentially be uninitialized, but we can not
+ // detect this situation reliably without possible false positives.
+ if (isa<AtomicCmpXchgInst>(I))
+ insertShadowCheck(I.getOperand(1), &I);
+
+ IRB.CreateStore(getCleanShadow(&I), ShadowPtr);
+
+ setShadow(&I, getCleanShadow(&I));
+ }
+
+ void visitAtomicRMWInst(AtomicRMWInst &I) {
+ handleCASOrRMW(I);
+ I.setOrdering(addReleaseOrdering(I.getOrdering()));
+ }
+
+ void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
+ handleCASOrRMW(I);
+ I.setOrdering(addReleaseOrdering(I.getOrdering()));
+ }
+
// Vector manipulation.
void visitExtractElementInst(ExtractElementInst &I) {
- insertCheck(I.getOperand(1), &I);
+ insertShadowCheck(I.getOperand(1), &I);
IRBuilder<> IRB(&I);
setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
"_msprop"));
@@ -887,7 +1101,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
void visitInsertElementInst(InsertElementInst &I) {
- insertCheck(I.getOperand(2), &I);
+ insertShadowCheck(I.getOperand(2), &I);
IRBuilder<> IRB(&I);
setShadow(&I, IRB.CreateInsertElement(getShadow(&I, 0), getShadow(&I, 1),
I.getOperand(2), "_msprop"));
@@ -895,7 +1109,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
void visitShuffleVectorInst(ShuffleVectorInst &I) {
- insertCheck(I.getOperand(2), &I);
+ insertShadowCheck(I.getOperand(2), &I);
IRBuilder<> IRB(&I);
setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1),
I.getOperand(2), "_msprop"));
@@ -1094,18 +1308,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// \brief Cast between two shadow types, extending or truncating as
/// necessary.
- Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy) {
+ Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy,
+ bool Signed = false) {
Type *srcTy = V->getType();
if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
- return IRB.CreateIntCast(V, dstTy, false);
+ return IRB.CreateIntCast(V, dstTy, Signed);
if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
dstTy->getVectorNumElements() == srcTy->getVectorNumElements())
- return IRB.CreateIntCast(V, dstTy, false);
+ return IRB.CreateIntCast(V, dstTy, Signed);
size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
Value *V2 =
- IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), false);
+ IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), Signed);
return IRB.CreateBitCast(V2, dstTy);
// TODO: handle struct types.
}
@@ -1130,7 +1345,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void handleDiv(Instruction &I) {
IRBuilder<> IRB(&I);
// Strict on the second argument.
- insertCheck(I.getOperand(1), &I);
+ insertShadowCheck(I.getOperand(1), &I);
setShadow(&I, getShadow(&I, 0));
setOrigin(&I, getOrigin(&I, 0));
}
@@ -1413,7 +1628,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRB.CreateAlignedStore(Shadow, ShadowPtr, 1);
if (ClCheckAccessAddress)
- insertCheck(Addr, &I);
+ insertShadowCheck(Addr, &I);
// FIXME: use ClStoreCleanOrigin
// FIXME: factor out common code from materializeStores
@@ -1440,9 +1655,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setShadow(&I, getCleanShadow(&I));
}
-
if (ClCheckAccessAddress)
- insertCheck(Addr, &I);
+ insertShadowCheck(Addr, &I);
if (MS.TrackOrigins) {
if (LoadShadow)
@@ -1539,11 +1753,119 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(&I, getOrigin(Op));
}
+ // \brief Instrument vector convert instrinsic.
+ //
+ // This function instruments intrinsics like cvtsi2ss:
+ // %Out = int_xxx_cvtyyy(%ConvertOp)
+ // or
+ // %Out = int_xxx_cvtyyy(%CopyOp, %ConvertOp)
+ // Intrinsic converts \p NumUsedElements elements of \p ConvertOp to the same
+ // number \p Out elements, and (if has 2 arguments) copies the rest of the
+ // elements from \p CopyOp.
+ // In most cases conversion involves floating-point value which may trigger a
+ // hardware exception when not fully initialized. For this reason we require
+ // \p ConvertOp[0:NumUsedElements] to be fully initialized and trap otherwise.
+ // We copy the shadow of \p CopyOp[NumUsedElements:] to \p
+ // Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always
+ // return a fully initialized value.
+ void handleVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements) {
+ IRBuilder<> IRB(&I);
+ Value *CopyOp, *ConvertOp;
+
+ switch (I.getNumArgOperands()) {
+ case 2:
+ CopyOp = I.getArgOperand(0);
+ ConvertOp = I.getArgOperand(1);
+ break;
+ case 1:
+ ConvertOp = I.getArgOperand(0);
+ CopyOp = NULL;
+ break;
+ default:
+ llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
+ }
+
+ // The first *NumUsedElements* elements of ConvertOp are converted to the
+ // same number of output elements. The rest of the output is copied from
+ // CopyOp, or (if not available) filled with zeroes.
+ // Combine shadow for elements of ConvertOp that are used in this operation,
+ // and insert a check.
+ // FIXME: consider propagating shadow of ConvertOp, at least in the case of
+ // int->any conversion.
+ Value *ConvertShadow = getShadow(ConvertOp);
+ Value *AggShadow = 0;
+ if (ConvertOp->getType()->isVectorTy()) {
+ AggShadow = IRB.CreateExtractElement(
+ ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
+ for (int i = 1; i < NumUsedElements; ++i) {
+ Value *MoreShadow = IRB.CreateExtractElement(
+ ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), i));
+ AggShadow = IRB.CreateOr(AggShadow, MoreShadow);
+ }
+ } else {
+ AggShadow = ConvertShadow;
+ }
+ assert(AggShadow->getType()->isIntegerTy());
+ insertShadowCheck(AggShadow, getOrigin(ConvertOp), &I);
+
+ // Build result shadow by zero-filling parts of CopyOp shadow that come from
+ // ConvertOp.
+ if (CopyOp) {
+ assert(CopyOp->getType() == I.getType());
+ assert(CopyOp->getType()->isVectorTy());
+ Value *ResultShadow = getShadow(CopyOp);
+ Type *EltTy = ResultShadow->getType()->getVectorElementType();
+ for (int i = 0; i < NumUsedElements; ++i) {
+ ResultShadow = IRB.CreateInsertElement(
+ ResultShadow, ConstantInt::getNullValue(EltTy),
+ ConstantInt::get(IRB.getInt32Ty(), i));
+ }
+ setShadow(&I, ResultShadow);
+ setOrigin(&I, getOrigin(CopyOp));
+ } else {
+ setShadow(&I, getCleanShadow(&I));
+ }
+ }
+
void visitIntrinsicInst(IntrinsicInst &I) {
switch (I.getIntrinsicID()) {
case llvm::Intrinsic::bswap:
handleBswap(I);
break;
+ case llvm::Intrinsic::x86_avx512_cvtsd2usi64:
+ case llvm::Intrinsic::x86_avx512_cvtsd2usi:
+ case llvm::Intrinsic::x86_avx512_cvtss2usi64:
+ case llvm::Intrinsic::x86_avx512_cvtss2usi:
+ case llvm::Intrinsic::x86_avx512_cvttss2usi64:
+ case llvm::Intrinsic::x86_avx512_cvttss2usi:
+ case llvm::Intrinsic::x86_avx512_cvttsd2usi64:
+ case llvm::Intrinsic::x86_avx512_cvttsd2usi:
+ case llvm::Intrinsic::x86_avx512_cvtusi2sd:
+ case llvm::Intrinsic::x86_avx512_cvtusi2ss:
+ case llvm::Intrinsic::x86_avx512_cvtusi642sd:
+ case llvm::Intrinsic::x86_avx512_cvtusi642ss:
+ case llvm::Intrinsic::x86_sse2_cvtsd2si64:
+ case llvm::Intrinsic::x86_sse2_cvtsd2si:
+ case llvm::Intrinsic::x86_sse2_cvtsd2ss:
+ case llvm::Intrinsic::x86_sse2_cvtsi2sd:
+ case llvm::Intrinsic::x86_sse2_cvtsi642sd:
+ case llvm::Intrinsic::x86_sse2_cvtss2sd:
+ case llvm::Intrinsic::x86_sse2_cvttsd2si64:
+ case llvm::Intrinsic::x86_sse2_cvttsd2si:
+ case llvm::Intrinsic::x86_sse_cvtsi2ss:
+ case llvm::Intrinsic::x86_sse_cvtsi642ss:
+ case llvm::Intrinsic::x86_sse_cvtss2si64:
+ case llvm::Intrinsic::x86_sse_cvtss2si:
+ case llvm::Intrinsic::x86_sse_cvttss2si64:
+ case llvm::Intrinsic::x86_sse_cvttss2si:
+ handleVectorConvertIntrinsic(I, 1);
+ break;
+ case llvm::Intrinsic::x86_sse2_cvtdq2pd:
+ case llvm::Intrinsic::x86_sse2_cvtps2pd:
+ case llvm::Intrinsic::x86_sse_cvtps2pi:
+ case llvm::Intrinsic::x86_sse_cvttps2pi:
+ handleVectorConvertIntrinsic(I, 2);
+ break;
default:
if (!handleUnknownIntrinsic(I))
visitInstruction(I);
@@ -1589,6 +1911,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
}
IRBuilder<> IRB(&I);
+
+ if (MS.WrapIndirectCalls && !CS.getCalledFunction())
+ IndirectCallList.push_back(CS);
+
unsigned ArgOffset = 0;
DEBUG(dbgs() << " CallSite: " << I << "\n");
for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
@@ -1632,7 +1958,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
DEBUG(dbgs() << " done with call args\n");
FunctionType *FT =
- cast<FunctionType>(CS.getCalledValue()->getType()-> getContainedType(0));
+ cast<FunctionType>(CS.getCalledValue()->getType()->getContainedType(0));
if (FT->isVarArg()) {
VAHelper->visitCallSite(CS, IRB);
}
@@ -1671,12 +1997,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void visitReturnInst(ReturnInst &I) {
IRBuilder<> IRB(&I);
- if (Value *RetVal = I.getReturnValue()) {
- // Set the shadow for the RetVal.
+ Value *RetVal = I.getReturnValue();
+ if (!RetVal) return;
+ Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
+ if (CheckReturnValue) {
+ insertShadowCheck(RetVal, &I);
+ Value *Shadow = getCleanShadow(RetVal);
+ IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
+ } else {
Value *Shadow = getShadow(RetVal);
- Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
- DEBUG(dbgs() << "Return: " << *Shadow << "\n" << *ShadowPtr << "\n");
IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
+ // FIXME: make it conditional if ClStoreCleanOrigin==0
if (MS.TrackOrigins)
IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
}
@@ -1694,20 +2025,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void visitAllocaInst(AllocaInst &I) {
setShadow(&I, getCleanShadow(&I));
- if (!ClPoisonStack) return;
IRBuilder<> IRB(I.getNextNode());
uint64_t Size = MS.TD->getTypeAllocSize(I.getAllocatedType());
- if (ClPoisonStackWithCall) {
+ if (PoisonStack && ClPoisonStackWithCall) {
IRB.CreateCall2(MS.MsanPoisonStackFn,
IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
ConstantInt::get(MS.IntptrTy, Size));
} else {
Value *ShadowBase = getShadowPtr(&I, Type::getInt8PtrTy(*MS.C), IRB);
- IRB.CreateMemSet(ShadowBase, IRB.getInt8(ClPoisonStackPattern),
- Size, I.getAlignment());
+ Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
+ IRB.CreateMemSet(ShadowBase, PoisonValue, Size, I.getAlignment());
}
- if (MS.TrackOrigins) {
+ if (PoisonStack && MS.TrackOrigins) {
setOrigin(&I, getCleanOrigin());
SmallString<2048> StackDescriptionStorage;
raw_svector_ostream StackDescription(StackDescriptionStorage);
@@ -1720,18 +2050,34 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Descr =
createPrivateNonConstGlobalForString(*F.getParent(),
StackDescription.str());
- IRB.CreateCall3(MS.MsanSetAllocaOriginFn,
+
+ IRB.CreateCall4(MS.MsanSetAllocaOrigin4Fn,
IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
ConstantInt::get(MS.IntptrTy, Size),
- IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()));
+ IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(&F, MS.IntptrTy));
}
}
void visitSelectInst(SelectInst& I) {
IRBuilder<> IRB(&I);
- setShadow(&I, IRB.CreateSelect(I.getCondition(),
- getShadow(I.getTrueValue()), getShadow(I.getFalseValue()),
- "_msprop"));
+ // a = select b, c, d
+ Value *S = IRB.CreateSelect(I.getCondition(), getShadow(I.getTrueValue()),
+ getShadow(I.getFalseValue()));
+ if (I.getType()->isAggregateType()) {
+ // To avoid "sign extending" i1 to an arbitrary aggregate type, we just do
+ // an extra "select". This results in much more compact IR.
+ // Sa = select Sb, poisoned, (select b, Sc, Sd)
+ S = IRB.CreateSelect(getShadow(I.getCondition()),
+ getPoisonedShadow(getShadowTy(I.getType())), S,
+ "_msprop_select_agg");
+ } else {
+ // Sa = (sext Sb) | (select b, Sc, Sd)
+ S = IRB.CreateOr(S, CreateShadowCast(IRB, getShadow(I.getCondition()),
+ S->getType(), true),
+ "_msprop_select");
+ }
+ setShadow(&I, S);
if (MS.TrackOrigins) {
// Origins are always i32, so any vector conditions must be flattened.
// FIXME: consider tracking vector origins for app vectors?
@@ -1766,7 +2112,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
DEBUG(dbgs() << " ResShadow: " << *ResShadow << "\n");
setShadow(&I, ResShadow);
- setOrigin(&I, getCleanOrigin());
+ setOriginForNaryOp(I);
}
void visitInsertValueInst(InsertValueInst &I) {
@@ -1779,7 +2125,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
DEBUG(dbgs() << " Res: " << *Res << "\n");
setShadow(&I, Res);
- setOrigin(&I, getCleanOrigin());
+ setOriginForNaryOp(I);
}
void dumpInst(Instruction &I) {
@@ -1802,7 +2148,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
dumpInst(I);
DEBUG(dbgs() << "DEFAULT: " << I << "\n");
for (size_t i = 0, n = I.getNumOperands(); i < n; i++)
- insertCheck(I.getOperand(i), &I);
+ insertShadowCheck(I.getOperand(i), &I);
setShadow(&I, getCleanShadow(&I));
setOrigin(&I, getCleanOrigin());
}
@@ -1956,16 +2302,35 @@ struct VarArgAMD64Helper : public VarArgHelper {
Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr);
Value *OverflowArgAreaShadowPtr =
MSV.getShadowPtr(OverflowArgAreaPtr, IRB.getInt8Ty(), IRB);
- Value *SrcPtr =
- getShadowPtrForVAArgument(VAArgTLSCopy, IRB, AMD64FpEndOffset);
+ Value *SrcPtr = IRB.CreateConstGEP1_32(VAArgTLSCopy, AMD64FpEndOffset);
IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, 16);
}
}
};
-VarArgHelper* CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
+/// \brief A no-op implementation of VarArgHelper.
+struct VarArgNoOpHelper : public VarArgHelper {
+ VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
+ MemorySanitizerVisitor &MSV) {}
+
+ void visitCallSite(CallSite &CS, IRBuilder<> &IRB) {}
+
+ void visitVAStartInst(VAStartInst &I) {}
+
+ void visitVACopyInst(VACopyInst &I) {}
+
+ void finalizeInstrumentation() {}
+};
+
+VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
MemorySanitizerVisitor &Visitor) {
- return new VarArgAMD64Helper(Func, Msan, Visitor);
+ // VarArg handling is only implemented on AMD64. False positives are possible
+ // on other platforms.
+ llvm::Triple TargetTriple(Func.getParent()->getTargetTriple());
+ if (TargetTriple.getArch() == llvm::Triple::x86_64)
+ return new VarArgAMD64Helper(Func, Msan, Visitor);
+ else
+ return new VarArgNoOpHelper(Func, Msan, Visitor);
}
} // namespace
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
deleted file mode 100644
index b45aef65..0000000
--- a/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
+++ /dev/null
@@ -1,225 +0,0 @@
-//===- OptimalEdgeProfiling.cpp - Insert counters for opt. edge profiling -===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass instruments the specified program with counters for edge profiling.
-// Edge profiling can give a reasonable approximation of the hot paths through a
-// program, and is used for a wide variety of program transformations.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "insert-optimal-edge-profiling"
-#include "llvm/Transforms/Instrumentation.h"
-#include "MaximumSpanningTree.h"
-#include "ProfilingUtils.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Analysis/ProfileInfoLoader.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-using namespace llvm;
-
-STATISTIC(NumEdgesInserted, "The # of edges inserted.");
-
-namespace {
- class OptimalEdgeProfiler : public ModulePass {
- bool runOnModule(Module &M);
- public:
- static char ID; // Pass identification, replacement for typeid
- OptimalEdgeProfiler() : ModulePass(ID) {
- initializeOptimalEdgeProfilerPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredID(ProfileEstimatorPassID);
- AU.addRequired<ProfileInfo>();
- }
-
- virtual const char *getPassName() const {
- return "Optimal Edge Profiler";
- }
- };
-}
-
-char OptimalEdgeProfiler::ID = 0;
-INITIALIZE_PASS_BEGIN(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
- "Insert optimal instrumentation for edge profiling",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(ProfileEstimatorPass)
-INITIALIZE_AG_DEPENDENCY(ProfileInfo)
-INITIALIZE_PASS_END(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
- "Insert optimal instrumentation for edge profiling",
- false, false)
-
-ModulePass *llvm::createOptimalEdgeProfilerPass() {
- return new OptimalEdgeProfiler();
-}
-
-inline static void printEdgeCounter(ProfileInfo::Edge e,
- BasicBlock* b,
- unsigned i) {
- DEBUG(dbgs() << "--Edge Counter for " << (e) << " in " \
- << ((b)?(b)->getName():"0") << " (# " << (i) << ")\n");
-}
-
-bool OptimalEdgeProfiler::runOnModule(Module &M) {
- Function *Main = M.getFunction("main");
- if (Main == 0) {
- errs() << "WARNING: cannot insert edge profiling into a module"
- << " with no main function!\n";
- return false; // No main, no instrumentation!
- }
-
- // NumEdges counts all the edges that may be instrumented. Later on its
- // decided which edges to actually instrument, to achieve optimal profiling.
- // For the entry block a virtual edge (0,entry) is reserved, for each block
- // with no successors an edge (BB,0) is reserved. These edges are necessary
- // to calculate a truly optimal maximum spanning tree and thus an optimal
- // instrumentation.
- unsigned NumEdges = 0;
-
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- // Reserve space for (0,entry) edge.
- ++NumEdges;
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- // Keep track of which blocks need to be instrumented. We don't want to
- // instrument blocks that are added as the result of breaking critical
- // edges!
- if (BB->getTerminator()->getNumSuccessors() == 0) {
- // Reserve space for (BB,0) edge.
- ++NumEdges;
- } else {
- NumEdges += BB->getTerminator()->getNumSuccessors();
- }
- }
- }
-
- // In the profiling output a counter for each edge is reserved, but only few
- // are used. This is done to be able to read back in the profile without
- // calulating the maximum spanning tree again, instead each edge counter that
- // is not used is initialised with -1 to signal that this edge counter has to
- // be calculated from other edge counters on reading the profile info back
- // in.
-
- Type *Int32 = Type::getInt32Ty(M.getContext());
- ArrayType *ATy = ArrayType::get(Int32, NumEdges);
- GlobalVariable *Counters =
- new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
- Constant::getNullValue(ATy), "OptEdgeProfCounters");
- NumEdgesInserted = 0;
-
- std::vector<Constant*> Initializer(NumEdges);
- Constant *Zero = ConstantInt::get(Int32, 0);
- Constant *Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted);
-
- // Instrument all of the edges not in MST...
- unsigned i = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- DEBUG(dbgs() << "Working on " << F->getName() << "\n");
-
- // Calculate a Maximum Spanning Tree with the edge weights determined by
- // ProfileEstimator. ProfileEstimator also assign weights to the virtual
- // edges (0,entry) and (BB,0) (for blocks with no successors) and this
- // edges also participate in the maximum spanning tree calculation.
- // The third parameter of MaximumSpanningTree() has the effect that not the
- // actual MST is returned but the edges _not_ in the MST.
-
- ProfileInfo::EdgeWeights ECs =
- getAnalysis<ProfileInfo>(*F).getEdgeWeights(F);
- std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end());
- MaximumSpanningTree<BasicBlock> MST(EdgeVector);
- std::stable_sort(MST.begin(), MST.end());
-
- // Check if (0,entry) not in the MST. If not, instrument edge
- // (IncrementCounterInBlock()) and set the counter initially to zero, if
- // the edge is in the MST the counter is initialised to -1.
-
- BasicBlock *entry = &(F->getEntryBlock());
- ProfileInfo::Edge edge = ProfileInfo::getEdge(0, entry);
- if (!std::binary_search(MST.begin(), MST.end(), edge)) {
- printEdgeCounter(edge, entry, i);
- IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted;
- Initializer[i++] = (Zero);
- } else{
- Initializer[i++] = (Uncounted);
- }
-
- // InsertedBlocks contains all blocks that were inserted for splitting an
- // edge, this blocks do not have to be instrumented.
- DenseSet<BasicBlock*> InsertedBlocks;
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- // Check if block was not inserted and thus does not have to be
- // instrumented.
- if (InsertedBlocks.count(BB)) continue;
-
- // Okay, we have to add a counter of each outgoing edge not in MST. If
- // the outgoing edge is not critical don't split it, just insert the
- // counter in the source or destination of the edge. Also, if the block
- // has no successors, the virtual edge (BB,0) is processed.
- TerminatorInst *TI = BB->getTerminator();
- if (TI->getNumSuccessors() == 0) {
- ProfileInfo::Edge edge = ProfileInfo::getEdge(BB, 0);
- if (!std::binary_search(MST.begin(), MST.end(), edge)) {
- printEdgeCounter(edge, BB, i);
- IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
- Initializer[i++] = (Zero);
- } else{
- Initializer[i++] = (Uncounted);
- }
- }
- for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
- BasicBlock *Succ = TI->getSuccessor(s);
- ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,Succ);
- if (!std::binary_search(MST.begin(), MST.end(), edge)) {
-
- // If the edge is critical, split it.
- bool wasInserted = SplitCriticalEdge(TI, s, this);
- Succ = TI->getSuccessor(s);
- if (wasInserted)
- InsertedBlocks.insert(Succ);
-
- // Okay, we are guaranteed that the edge is no longer critical. If
- // we only have a single successor, insert the counter in this block,
- // otherwise insert it in the successor block.
- if (TI->getNumSuccessors() == 1) {
- // Insert counter at the start of the block
- printEdgeCounter(edge, BB, i);
- IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
- } else {
- // Insert counter at the start of the block
- printEdgeCounter(edge, Succ, i);
- IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted;
- }
- Initializer[i++] = (Zero);
- } else {
- Initializer[i++] = (Uncounted);
- }
- }
- }
- }
-
- // Check if the number of edges counted at first was the number of edges we
- // considered for instrumentation.
- assert(i == NumEdges && "the number of edges in counting array is wrong");
-
- // Assign the now completely defined initialiser to the array.
- Constant *init = ConstantArray::get(ATy, Initializer);
- Counters->setInitializer(init);
-
- // Add the initialization call to main.
- InsertProfilingInitCall(Main, "llvm_start_opt_edge_profiling", Counters);
- return true;
-}
-
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp
deleted file mode 100644
index 7de7326..0000000
--- a/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp
+++ /dev/null
@@ -1,1424 +0,0 @@
-//===- PathProfiling.cpp - Inserts counters for path profiling ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass instruments functions for Ball-Larus path profiling. Ball-Larus
-// profiling converts the CFG into a DAG by replacing backedges with edges
-// from entry to the start block and from the end block to exit. The paths
-// along the new DAG are enumrated, i.e. each path is given a path number.
-// Edges are instrumented to increment the path number register, such that the
-// path number register will equal the path number of the path taken at the
-// exit.
-//
-// This file defines classes for building a CFG for use with different stages
-// in the Ball-Larus path profiling instrumentation [Ball96]. The
-// requirements are formatting the llvm CFG into the Ball-Larus DAG, path
-// numbering, finding a spanning tree, moving increments from the spanning
-// tree to chords.
-//
-// Terms:
-// DAG - Directed Acyclic Graph.
-// Ball-Larus DAG - A CFG with an entry node, an exit node, and backedges
-// removed in the following manner. For every backedge
-// v->w, insert edge ENTRY->w and edge v->EXIT.
-// Path Number - The number corresponding to a specific path through a
-// Ball-Larus DAG.
-// Spanning Tree - A subgraph, S, is a spanning tree if S covers all
-// vertices and is a tree.
-// Chord - An edge not in the spanning tree.
-//
-// [Ball96]
-// T. Ball and J. R. Larus. "Efficient Path Profiling."
-// International Symposium on Microarchitecture, pages 46-57, 1996.
-// http://portal.acm.org/citation.cfm?id=243857
-//
-// [Ball94]
-// Thomas Ball. "Efficiently Counting Program Events with Support for
-// On-line queries."
-// ACM Transactions on Programmmg Languages and Systems, Vol 16, No 5,
-// September 1994, Pages 1399-1410.
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "insert-path-profiling"
-
-#include "llvm/Transforms/Instrumentation.h"
-#include "ProfilingUtils.h"
-#include "llvm/Analysis/PathNumbering.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/TypeBuilder.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include <vector>
-
-#define HASH_THRESHHOLD 100000
-
-using namespace llvm;
-
-namespace {
-class BLInstrumentationNode;
-class BLInstrumentationEdge;
-class BLInstrumentationDag;
-
-// ---------------------------------------------------------------------------
-// BLInstrumentationNode extends BallLarusNode with member used by the
-// instrumentation algortihms.
-// ---------------------------------------------------------------------------
-class BLInstrumentationNode : public BallLarusNode {
-public:
- // Creates a new BLInstrumentationNode from a BasicBlock.
- BLInstrumentationNode(BasicBlock* BB);
-
- // Get/sets the Value corresponding to the pathNumber register,
- // constant or phinode. Used by the instrumentation code to remember
- // path number Values.
- Value* getStartingPathNumber();
- void setStartingPathNumber(Value* pathNumber);
-
- Value* getEndingPathNumber();
- void setEndingPathNumber(Value* pathNumber);
-
- // Get/set the PHINode Instruction for this node.
- PHINode* getPathPHI();
- void setPathPHI(PHINode* pathPHI);
-
-private:
-
- Value* _startingPathNumber; // The Value for the current pathNumber.
- Value* _endingPathNumber; // The Value for the current pathNumber.
- PHINode* _pathPHI; // The PHINode for current pathNumber.
-};
-
-// --------------------------------------------------------------------------
-// BLInstrumentationEdge extends BallLarusEdge with data about the
-// instrumentation that will end up on each edge.
-// --------------------------------------------------------------------------
-class BLInstrumentationEdge : public BallLarusEdge {
-public:
- BLInstrumentationEdge(BLInstrumentationNode* source,
- BLInstrumentationNode* target);
-
- // Sets the target node of this edge. Required to split edges.
- void setTarget(BallLarusNode* node);
-
- // Get/set whether edge is in the spanning tree.
- bool isInSpanningTree() const;
- void setIsInSpanningTree(bool isInSpanningTree);
-
- // Get/ set whether this edge will be instrumented with a path number
- // initialization.
- bool isInitialization() const;
- void setIsInitialization(bool isInitialization);
-
- // Get/set whether this edge will be instrumented with a path counter
- // increment. Notice this is incrementing the path counter
- // corresponding to the path number register. The path number
- // increment is determined by getIncrement().
- bool isCounterIncrement() const;
- void setIsCounterIncrement(bool isCounterIncrement);
-
- // Get/set the path number increment that this edge will be instrumented
- // with. This is distinct from the path counter increment and the
- // weight. The counter increment counts the number of executions of
- // some path, whereas the path number keeps track of which path number
- // the program is on.
- long getIncrement() const;
- void setIncrement(long increment);
-
- // Get/set whether the edge has been instrumented.
- bool hasInstrumentation();
- void setHasInstrumentation(bool hasInstrumentation);
-
- // Returns the successor number of this edge in the source.
- unsigned getSuccessorNumber();
-
-private:
- // The increment that the code will be instrumented with.
- long long _increment;
-
- // Whether this edge is in the spanning tree.
- bool _isInSpanningTree;
-
- // Whether this edge is an initialiation of the path number.
- bool _isInitialization;
-
- // Whether this edge is a path counter increment.
- bool _isCounterIncrement;
-
- // Whether this edge has been instrumented.
- bool _hasInstrumentation;
-};
-
-// ---------------------------------------------------------------------------
-// BLInstrumentationDag extends BallLarusDag with algorithms that
-// determine where instrumentation should be placed.
-// ---------------------------------------------------------------------------
-class BLInstrumentationDag : public BallLarusDag {
-public:
- BLInstrumentationDag(Function &F);
-
- // Returns the Exit->Root edge. This edge is required for creating
- // directed cycles in the algorithm for moving instrumentation off of
- // the spanning tree
- BallLarusEdge* getExitRootEdge();
-
- // Returns an array of phony edges which mark those nodes
- // with function calls
- BLEdgeVector getCallPhonyEdges();
-
- // Gets/sets the path counter array
- GlobalVariable* getCounterArray();
- void setCounterArray(GlobalVariable* c);
-
- // Calculates the increments for the chords, thereby removing
- // instrumentation from the spanning tree edges. Implementation is based
- // on the algorithm in Figure 4 of [Ball94]
- void calculateChordIncrements();
-
- // Updates the state when an edge has been split
- void splitUpdate(BLInstrumentationEdge* formerEdge, BasicBlock* newBlock);
-
- // Calculates a spanning tree of the DAG ignoring cycles. Whichever
- // edges are in the spanning tree will not be instrumented, but this
- // implementation does not try to minimize the instrumentation overhead
- // by trying to find hot edges.
- void calculateSpanningTree();
-
- // Pushes initialization further down in order to group the first
- // increment and initialization.
- void pushInitialization();
-
- // Pushes the path counter increments up in order to group the last path
- // number increment.
- void pushCounters();
-
- // Removes phony edges from the successor list of the source, and the
- // predecessor list of the target.
- void unlinkPhony();
-
- // Generate dot graph for the function
- void generateDotGraph();
-
-protected:
- // BLInstrumentationDag creates BLInstrumentationNode objects in this
- // method overriding the creation of BallLarusNode objects.
- //
- // Allows subclasses to determine which type of Node is created.
- // Override this method to produce subclasses of BallLarusNode if
- // necessary.
- virtual BallLarusNode* createNode(BasicBlock* BB);
-
- // BLInstrumentationDag create BLInstrumentationEdges.
- //
- // Allows subclasses to determine which type of Edge is created.
- // Override this method to produce subclasses of BallLarusEdge if
- // necessary. Parameters source and target will have been created by
- // createNode and can be cast to the subclass of BallLarusNode*
- // returned by createNode.
- virtual BallLarusEdge* createEdge(
- BallLarusNode* source, BallLarusNode* target, unsigned edgeNumber);
-
-private:
- BLEdgeVector _treeEdges; // All edges in the spanning tree.
- BLEdgeVector _chordEdges; // All edges not in the spanning tree.
- GlobalVariable* _counterArray; // Array to store path counters
-
- // Removes the edge from the appropriate predecessor and successor lists.
- void unlinkEdge(BallLarusEdge* edge);
-
- // Makes an edge part of the spanning tree.
- void makeEdgeSpanning(BLInstrumentationEdge* edge);
-
- // Pushes initialization and calls itself recursively.
- void pushInitializationFromEdge(BLInstrumentationEdge* edge);
-
- // Pushes path counter increments up recursively.
- void pushCountersFromEdge(BLInstrumentationEdge* edge);
-
- // Depth first algorithm for determining the chord increments.f
- void calculateChordIncrementsDfs(
- long weight, BallLarusNode* v, BallLarusEdge* e);
-
- // Determines the relative direction of two edges.
- int calculateChordIncrementsDir(BallLarusEdge* e, BallLarusEdge* f);
-};
-
-// ---------------------------------------------------------------------------
-// PathProfiler is a module pass which instruments path profiling instructions
-// ---------------------------------------------------------------------------
-class PathProfiler : public ModulePass {
-private:
- // Current context for multi threading support.
- LLVMContext* Context;
-
- // Which function are we currently instrumenting
- unsigned currentFunctionNumber;
-
- // The function prototype in the profiling runtime for incrementing a
- // single path counter in a hash table.
- Constant* llvmIncrementHashFunction;
- Constant* llvmDecrementHashFunction;
-
- // Instruments each function with path profiling. 'main' is instrumented
- // with code to save the profile to disk.
- bool runOnModule(Module &M);
-
- // Analyzes the function for Ball-Larus path profiling, and inserts code.
- void runOnFunction(std::vector<Constant*> &ftInit, Function &F, Module &M);
-
- // Creates an increment constant representing incr.
- ConstantInt* createIncrementConstant(long incr, int bitsize);
-
- // Creates an increment constant representing the value in
- // edge->getIncrement().
- ConstantInt* createIncrementConstant(BLInstrumentationEdge* edge);
-
- // Finds the insertion point after pathNumber in block. PathNumber may
- // be NULL.
- BasicBlock::iterator getInsertionPoint(
- BasicBlock* block, Value* pathNumber);
-
- // Inserts source's pathNumber Value* into target. Target may or may not
- // have multiple predecessors, and may or may not have its phiNode
- // initalized.
- void pushValueIntoNode(
- BLInstrumentationNode* source, BLInstrumentationNode* target);
-
- // Inserts source's pathNumber Value* into the appropriate slot of
- // target's phiNode.
- void pushValueIntoPHI(
- BLInstrumentationNode* target, BLInstrumentationNode* source);
-
- // The Value* in node, oldVal, is updated with a Value* correspodning to
- // oldVal + addition.
- void insertNumberIncrement(BLInstrumentationNode* node, Value* addition,
- bool atBeginning);
-
- // Creates a counter increment in the given node. The Value* in node is
- // taken as the index into a hash table.
- void insertCounterIncrement(
- Value* incValue,
- BasicBlock::iterator insertPoint,
- BLInstrumentationDag* dag,
- bool increment = true);
-
- // A PHINode is created in the node, and its values initialized to -1U.
- void preparePHI(BLInstrumentationNode* node);
-
- // Inserts instrumentation for the given edge
- //
- // Pre: The edge's source node has pathNumber set if edge is non zero
- // path number increment.
- //
- // Post: Edge's target node has a pathNumber set to the path number Value
- // corresponding to the value of the path register after edge's
- // execution.
- void insertInstrumentationStartingAt(
- BLInstrumentationEdge* edge,
- BLInstrumentationDag* dag);
-
- // If this edge is a critical edge, then inserts a node at this edge.
- // This edge becomes the first edge, and a new BallLarusEdge is created.
- bool splitCritical(BLInstrumentationEdge* edge, BLInstrumentationDag* dag);
-
- // Inserts instrumentation according to the marked edges in dag. Phony
- // edges must be unlinked from the DAG, but accessible from the
- // backedges. Dag must have initializations, path number increments, and
- // counter increments present.
- //
- // Counter storage is created here.
- void insertInstrumentation( BLInstrumentationDag& dag, Module &M);
-
-public:
- static char ID; // Pass identification, replacement for typeid
- PathProfiler() : ModulePass(ID) {
- initializePathProfilerPass(*PassRegistry::getPassRegistry());
- }
-
- virtual const char *getPassName() const {
- return "Path Profiler";
- }
-};
-} // end anonymous namespace
-
-// Should we print the dot-graphs
-static cl::opt<bool> DotPathDag("path-profile-pathdag", cl::Hidden,
- cl::desc("Output the path profiling DAG for each function."));
-
-// Register the path profiler as a pass
-char PathProfiler::ID = 0;
-INITIALIZE_PASS(PathProfiler, "insert-path-profiling",
- "Insert instrumentation for Ball-Larus path profiling",
- false, false)
-
-ModulePass *llvm::createPathProfilerPass() { return new PathProfiler(); }
-
-namespace llvm {
- class PathProfilingFunctionTable {};
-
- // Type for global array storing references to hashes or arrays
- template<bool xcompile> class TypeBuilder<PathProfilingFunctionTable,
- xcompile> {
- public:
- static StructType *get(LLVMContext& C) {
- return( StructType::get(
- TypeBuilder<types::i<32>, xcompile>::get(C), // type
- TypeBuilder<types::i<32>, xcompile>::get(C), // array size
- TypeBuilder<types::i<8>*, xcompile>::get(C), // array/hash ptr
- NULL));
- }
- };
-
- typedef TypeBuilder<PathProfilingFunctionTable, true>
- ftEntryTypeBuilder;
-
- // BallLarusEdge << operator overloading
- raw_ostream& operator<<(raw_ostream& os,
- const BLInstrumentationEdge& edge)
- LLVM_ATTRIBUTE_USED;
- raw_ostream& operator<<(raw_ostream& os,
- const BLInstrumentationEdge& edge) {
- os << "[" << edge.getSource()->getName() << " -> "
- << edge.getTarget()->getName() << "] init: "
- << (edge.isInitialization() ? "yes" : "no")
- << " incr:" << edge.getIncrement() << " cinc: "
- << (edge.isCounterIncrement() ? "yes" : "no");
- return(os);
- }
-}
-
-// Creates a new BLInstrumentationNode from a BasicBlock.
-BLInstrumentationNode::BLInstrumentationNode(BasicBlock* BB) :
- BallLarusNode(BB),
- _startingPathNumber(NULL), _endingPathNumber(NULL), _pathPHI(NULL) {}
-
-// Constructor for BLInstrumentationEdge.
-BLInstrumentationEdge::BLInstrumentationEdge(BLInstrumentationNode* source,
- BLInstrumentationNode* target)
- : BallLarusEdge(source, target, 0),
- _increment(0), _isInSpanningTree(false), _isInitialization(false),
- _isCounterIncrement(false), _hasInstrumentation(false) {}
-
-// Sets the target node of this edge. Required to split edges.
-void BLInstrumentationEdge::setTarget(BallLarusNode* node) {
- _target = node;
-}
-
-// Returns whether this edge is in the spanning tree.
-bool BLInstrumentationEdge::isInSpanningTree() const {
- return(_isInSpanningTree);
-}
-
-// Sets whether this edge is in the spanning tree.
-void BLInstrumentationEdge::setIsInSpanningTree(bool isInSpanningTree) {
- _isInSpanningTree = isInSpanningTree;
-}
-
-// Returns whether this edge will be instrumented with a path number
-// initialization.
-bool BLInstrumentationEdge::isInitialization() const {
- return(_isInitialization);
-}
-
-// Sets whether this edge will be instrumented with a path number
-// initialization.
-void BLInstrumentationEdge::setIsInitialization(bool isInitialization) {
- _isInitialization = isInitialization;
-}
-
-// Returns whether this edge will be instrumented with a path counter
-// increment. Notice this is incrementing the path counter
-// corresponding to the path number register. The path number
-// increment is determined by getIncrement().
-bool BLInstrumentationEdge::isCounterIncrement() const {
- return(_isCounterIncrement);
-}
-
-// Sets whether this edge will be instrumented with a path counter
-// increment.
-void BLInstrumentationEdge::setIsCounterIncrement(bool isCounterIncrement) {
- _isCounterIncrement = isCounterIncrement;
-}
-
-// Gets the path number increment that this edge will be instrumented
-// with. This is distinct from the path counter increment and the
-// weight. The counter increment is counts the number of executions of
-// some path, whereas the path number keeps track of which path number
-// the program is on.
-long BLInstrumentationEdge::getIncrement() const {
- return(_increment);
-}
-
-// Set whether this edge will be instrumented with a path number
-// increment.
-void BLInstrumentationEdge::setIncrement(long increment) {
- _increment = increment;
-}
-
-// True iff the edge has already been instrumented.
-bool BLInstrumentationEdge::hasInstrumentation() {
- return(_hasInstrumentation);
-}
-
-// Set whether this edge has been instrumented.
-void BLInstrumentationEdge::setHasInstrumentation(bool hasInstrumentation) {
- _hasInstrumentation = hasInstrumentation;
-}
-
-// Returns the successor number of this edge in the source.
-unsigned BLInstrumentationEdge::getSuccessorNumber() {
- BallLarusNode* sourceNode = getSource();
- BallLarusNode* targetNode = getTarget();
- BasicBlock* source = sourceNode->getBlock();
- BasicBlock* target = targetNode->getBlock();
-
- if(source == NULL || target == NULL)
- return(0);
-
- TerminatorInst* terminator = source->getTerminator();
-
- unsigned i;
- for(i=0; i < terminator->getNumSuccessors(); i++) {
- if(terminator->getSuccessor(i) == target)
- break;
- }
-
- return(i);
-}
-
-// BLInstrumentationDag constructor initializes a DAG for the given Function.
-BLInstrumentationDag::BLInstrumentationDag(Function &F) : BallLarusDag(F),
- _counterArray(0) {
-}
-
-// Returns the Exit->Root edge. This edge is required for creating
-// directed cycles in the algorithm for moving instrumentation off of
-// the spanning tree
-BallLarusEdge* BLInstrumentationDag::getExitRootEdge() {
- BLEdgeIterator erEdge = getExit()->succBegin();
- return(*erEdge);
-}
-
-BLEdgeVector BLInstrumentationDag::getCallPhonyEdges () {
- BLEdgeVector callEdges;
-
- for( BLEdgeIterator edge = _edges.begin(), end = _edges.end();
- edge != end; edge++ ) {
- if( (*edge)->getType() == BallLarusEdge::CALLEDGE_PHONY )
- callEdges.push_back(*edge);
- }
-
- return callEdges;
-}
-
-// Gets the path counter array
-GlobalVariable* BLInstrumentationDag::getCounterArray() {
- return _counterArray;
-}
-
-void BLInstrumentationDag::setCounterArray(GlobalVariable* c) {
- _counterArray = c;
-}
-
-// Calculates the increment for the chords, thereby removing
-// instrumentation from the spanning tree edges. Implementation is based on
-// the algorithm in Figure 4 of [Ball94]
-void BLInstrumentationDag::calculateChordIncrements() {
- calculateChordIncrementsDfs(0, getRoot(), NULL);
-
- BLInstrumentationEdge* chord;
- for(BLEdgeIterator chordEdge = _chordEdges.begin(),
- end = _chordEdges.end(); chordEdge != end; chordEdge++) {
- chord = (BLInstrumentationEdge*) *chordEdge;
- chord->setIncrement(chord->getIncrement() + chord->getWeight());
- }
-}
-
-// Updates the state when an edge has been split
-void BLInstrumentationDag::splitUpdate(BLInstrumentationEdge* formerEdge,
- BasicBlock* newBlock) {
- BallLarusNode* oldTarget = formerEdge->getTarget();
- BallLarusNode* newNode = addNode(newBlock);
- formerEdge->setTarget(newNode);
- newNode->addPredEdge(formerEdge);
-
- DEBUG(dbgs() << " Edge split: " << *formerEdge << "\n");
-
- oldTarget->removePredEdge(formerEdge);
- BallLarusEdge* newEdge = addEdge(newNode, oldTarget,0);
-
- if( formerEdge->getType() == BallLarusEdge::BACKEDGE ||
- formerEdge->getType() == BallLarusEdge::SPLITEDGE) {
- newEdge->setType(formerEdge->getType());
- newEdge->setPhonyRoot(formerEdge->getPhonyRoot());
- newEdge->setPhonyExit(formerEdge->getPhonyExit());
- formerEdge->setType(BallLarusEdge::NORMAL);
- formerEdge->setPhonyRoot(NULL);
- formerEdge->setPhonyExit(NULL);
- }
-}
-
-// Calculates a spanning tree of the DAG ignoring cycles. Whichever
-// edges are in the spanning tree will not be instrumented, but this
-// implementation does not try to minimize the instrumentation overhead
-// by trying to find hot edges.
-void BLInstrumentationDag::calculateSpanningTree() {
- std::stack<BallLarusNode*> dfsStack;
-
- for(BLNodeIterator nodeIt = _nodes.begin(), end = _nodes.end();
- nodeIt != end; nodeIt++) {
- (*nodeIt)->setColor(BallLarusNode::WHITE);
- }
-
- dfsStack.push(getRoot());
- while(dfsStack.size() > 0) {
- BallLarusNode* node = dfsStack.top();
- dfsStack.pop();
-
- if(node->getColor() == BallLarusNode::WHITE)
- continue;
-
- BallLarusNode* nextNode;
- bool forward = true;
- BLEdgeIterator succEnd = node->succEnd();
-
- node->setColor(BallLarusNode::WHITE);
- // first iterate over successors then predecessors
- for(BLEdgeIterator edge = node->succBegin(), predEnd = node->predEnd();
- edge != predEnd; edge++) {
- if(edge == succEnd) {
- edge = node->predBegin();
- forward = false;
- }
-
- // Ignore split edges
- if ((*edge)->getType() == BallLarusEdge::SPLITEDGE)
- continue;
-
- nextNode = forward? (*edge)->getTarget(): (*edge)->getSource();
- if(nextNode->getColor() != BallLarusNode::WHITE) {
- nextNode->setColor(BallLarusNode::WHITE);
- makeEdgeSpanning((BLInstrumentationEdge*)(*edge));
- }
- }
- }
-
- for(BLEdgeIterator edge = _edges.begin(), end = _edges.end();
- edge != end; edge++) {
- BLInstrumentationEdge* instEdge = (BLInstrumentationEdge*) (*edge);
- // safe since createEdge is overriden
- if(!instEdge->isInSpanningTree() && (*edge)->getType()
- != BallLarusEdge::SPLITEDGE)
- _chordEdges.push_back(instEdge);
- }
-}
-
-// Pushes initialization further down in order to group the first
-// increment and initialization.
-void BLInstrumentationDag::pushInitialization() {
- BLInstrumentationEdge* exitRootEdge =
- (BLInstrumentationEdge*) getExitRootEdge();
- exitRootEdge->setIsInitialization(true);
- pushInitializationFromEdge(exitRootEdge);
-}
-
-// Pushes the path counter increments up in order to group the last path
-// number increment.
-void BLInstrumentationDag::pushCounters() {
- BLInstrumentationEdge* exitRootEdge =
- (BLInstrumentationEdge*) getExitRootEdge();
- exitRootEdge->setIsCounterIncrement(true);
- pushCountersFromEdge(exitRootEdge);
-}
-
-// Removes phony edges from the successor list of the source, and the
-// predecessor list of the target.
-void BLInstrumentationDag::unlinkPhony() {
- BallLarusEdge* edge;
-
- for(BLEdgeIterator next = _edges.begin(),
- end = _edges.end(); next != end; next++) {
- edge = (*next);
-
- if( edge->getType() == BallLarusEdge::BACKEDGE_PHONY ||
- edge->getType() == BallLarusEdge::SPLITEDGE_PHONY ||
- edge->getType() == BallLarusEdge::CALLEDGE_PHONY ) {
- unlinkEdge(edge);
- }
- }
-}
-
-// Generate a .dot graph to represent the DAG and pathNumbers
-void BLInstrumentationDag::generateDotGraph() {
- std::string errorInfo;
- std::string functionName = getFunction().getName().str();
- std::string filename = "pathdag." + functionName + ".dot";
-
- DEBUG (dbgs() << "Writing '" << filename << "'...\n");
- raw_fd_ostream dotFile(filename.c_str(), errorInfo);
-
- if (!errorInfo.empty()) {
- errs() << "Error opening '" << filename.c_str() <<"' for writing!";
- errs() << "\n";
- return;
- }
-
- dotFile << "digraph " << functionName << " {\n";
-
- for( BLEdgeIterator edge = _edges.begin(), end = _edges.end();
- edge != end; edge++) {
- std::string sourceName = (*edge)->getSource()->getName();
- std::string targetName = (*edge)->getTarget()->getName();
-
- dotFile << "\t\"" << sourceName.c_str() << "\" -> \""
- << targetName.c_str() << "\" ";
-
- long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement();
-
- switch( (*edge)->getType() ) {
- case BallLarusEdge::NORMAL:
- dotFile << "[label=" << inc << "] [color=black];\n";
- break;
-
- case BallLarusEdge::BACKEDGE:
- dotFile << "[color=cyan];\n";
- break;
-
- case BallLarusEdge::BACKEDGE_PHONY:
- dotFile << "[label=" << inc
- << "] [color=blue];\n";
- break;
-
- case BallLarusEdge::SPLITEDGE:
- dotFile << "[color=violet];\n";
- break;
-
- case BallLarusEdge::SPLITEDGE_PHONY:
- dotFile << "[label=" << inc << "] [color=red];\n";
- break;
-
- case BallLarusEdge::CALLEDGE_PHONY:
- dotFile << "[label=" << inc << "] [color=green];\n";
- break;
- }
- }
-
- dotFile << "}\n";
-}
-
-// Allows subclasses to determine which type of Node is created.
-// Override this method to produce subclasses of BallLarusNode if
-// necessary. The destructor of BallLarusDag will call free on each pointer
-// created.
-BallLarusNode* BLInstrumentationDag::createNode(BasicBlock* BB) {
- return( new BLInstrumentationNode(BB) );
-}
-
-// Allows subclasses to determine which type of Edge is created.
-// Override this method to produce subclasses of BallLarusEdge if
-// necessary. The destructor of BallLarusDag will call free on each pointer
-// created.
-BallLarusEdge* BLInstrumentationDag::createEdge(BallLarusNode* source,
- BallLarusNode* target, unsigned edgeNumber) {
- // One can cast from BallLarusNode to BLInstrumentationNode since createNode
- // is overriden to produce BLInstrumentationNode.
- return( new BLInstrumentationEdge((BLInstrumentationNode*)source,
- (BLInstrumentationNode*)target) );
-}
-
-// Sets the Value corresponding to the pathNumber register, constant,
-// or phinode. Used by the instrumentation code to remember path
-// number Values.
-Value* BLInstrumentationNode::getStartingPathNumber(){
- return(_startingPathNumber);
-}
-
-// Sets the Value of the pathNumber. Used by the instrumentation code.
-void BLInstrumentationNode::setStartingPathNumber(Value* pathNumber) {
- DEBUG(dbgs() << " SPN-" << getName() << " <-- " << (pathNumber ?
- pathNumber->getName() :
- "unused") << "\n");
- _startingPathNumber = pathNumber;
-}
-
-Value* BLInstrumentationNode::getEndingPathNumber(){
- return(_endingPathNumber);
-}
-
-void BLInstrumentationNode::setEndingPathNumber(Value* pathNumber) {
- DEBUG(dbgs() << " EPN-" << getName() << " <-- "
- << (pathNumber ? pathNumber->getName() : "unused") << "\n");
- _endingPathNumber = pathNumber;
-}
-
-// Get the PHINode Instruction for this node. Used by instrumentation
-// code.
-PHINode* BLInstrumentationNode::getPathPHI() {
- return(_pathPHI);
-}
-
-// Set the PHINode Instruction for this node. Used by instrumentation
-// code.
-void BLInstrumentationNode::setPathPHI(PHINode* pathPHI) {
- _pathPHI = pathPHI;
-}
-
-// Removes the edge from the appropriate predecessor and successor
-// lists.
-void BLInstrumentationDag::unlinkEdge(BallLarusEdge* edge) {
- if(edge == getExitRootEdge())
- DEBUG(dbgs() << " Removing exit->root edge\n");
-
- edge->getSource()->removeSuccEdge(edge);
- edge->getTarget()->removePredEdge(edge);
-}
-
-// Makes an edge part of the spanning tree.
-void BLInstrumentationDag::makeEdgeSpanning(BLInstrumentationEdge* edge) {
- edge->setIsInSpanningTree(true);
- _treeEdges.push_back(edge);
-}
-
-// Pushes initialization and calls itself recursively.
-void BLInstrumentationDag::pushInitializationFromEdge(
- BLInstrumentationEdge* edge) {
- BallLarusNode* target;
-
- target = edge->getTarget();
- if( target->getNumberPredEdges() > 1 || target == getExit() ) {
- return;
- } else {
- for(BLEdgeIterator next = target->succBegin(),
- end = target->succEnd(); next != end; next++) {
- BLInstrumentationEdge* intoEdge = (BLInstrumentationEdge*) *next;
-
- // Skip split edges
- if (intoEdge->getType() == BallLarusEdge::SPLITEDGE)
- continue;
-
- intoEdge->setIncrement(intoEdge->getIncrement() +
- edge->getIncrement());
- intoEdge->setIsInitialization(true);
- pushInitializationFromEdge(intoEdge);
- }
-
- edge->setIncrement(0);
- edge->setIsInitialization(false);
- }
-}
-
-// Pushes path counter increments up recursively.
-void BLInstrumentationDag::pushCountersFromEdge(BLInstrumentationEdge* edge) {
- BallLarusNode* source;
-
- source = edge->getSource();
- if(source->getNumberSuccEdges() > 1 || source == getRoot()
- || edge->isInitialization()) {
- return;
- } else {
- for(BLEdgeIterator previous = source->predBegin(),
- end = source->predEnd(); previous != end; previous++) {
- BLInstrumentationEdge* fromEdge = (BLInstrumentationEdge*) *previous;
-
- // Skip split edges
- if (fromEdge->getType() == BallLarusEdge::SPLITEDGE)
- continue;
-
- fromEdge->setIncrement(fromEdge->getIncrement() +
- edge->getIncrement());
- fromEdge->setIsCounterIncrement(true);
- pushCountersFromEdge(fromEdge);
- }
-
- edge->setIncrement(0);
- edge->setIsCounterIncrement(false);
- }
-}
-
-// Depth first algorithm for determining the chord increments.
-void BLInstrumentationDag::calculateChordIncrementsDfs(long weight,
- BallLarusNode* v, BallLarusEdge* e) {
- BLInstrumentationEdge* f;
-
- for(BLEdgeIterator treeEdge = _treeEdges.begin(),
- end = _treeEdges.end(); treeEdge != end; treeEdge++) {
- f = (BLInstrumentationEdge*) *treeEdge;
- if(e != f && v == f->getTarget()) {
- calculateChordIncrementsDfs(
- calculateChordIncrementsDir(e,f)*(weight) +
- f->getWeight(), f->getSource(), f);
- }
- if(e != f && v == f->getSource()) {
- calculateChordIncrementsDfs(
- calculateChordIncrementsDir(e,f)*(weight) +
- f->getWeight(), f->getTarget(), f);
- }
- }
-
- for(BLEdgeIterator chordEdge = _chordEdges.begin(),
- end = _chordEdges.end(); chordEdge != end; chordEdge++) {
- f = (BLInstrumentationEdge*) *chordEdge;
- if(v == f->getSource() || v == f->getTarget()) {
- f->setIncrement(f->getIncrement() +
- calculateChordIncrementsDir(e,f)*weight);
- }
- }
-}
-
-// Determines the relative direction of two edges.
-int BLInstrumentationDag::calculateChordIncrementsDir(BallLarusEdge* e,
- BallLarusEdge* f) {
- if( e == NULL)
- return(1);
- else if(e->getSource() == f->getTarget()
- || e->getTarget() == f->getSource())
- return(1);
-
- return(-1);
-}
-
-// Creates an increment constant representing incr.
-ConstantInt* PathProfiler::createIncrementConstant(long incr,
- int bitsize) {
- return(ConstantInt::get(IntegerType::get(*Context, 32), incr));
-}
-
-// Creates an increment constant representing the value in
-// edge->getIncrement().
-ConstantInt* PathProfiler::createIncrementConstant(
- BLInstrumentationEdge* edge) {
- return(createIncrementConstant(edge->getIncrement(), 32));
-}
-
-// Finds the insertion point after pathNumber in block. PathNumber may
-// be NULL.
-BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value*
- pathNumber) {
- if(pathNumber == NULL || isa<ConstantInt>(pathNumber)
- || (((Instruction*)(pathNumber))->getParent()) != block) {
- return(block->getFirstInsertionPt());
- } else {
- Instruction* pathNumberInst = (Instruction*) (pathNumber);
- BasicBlock::iterator insertPoint;
- BasicBlock::iterator end = block->end();
-
- for(insertPoint = block->begin();
- insertPoint != end; insertPoint++) {
- Instruction* insertInst = &(*insertPoint);
-
- if(insertInst == pathNumberInst)
- return(++insertPoint);
- }
-
- return(insertPoint);
- }
-}
-
-// A PHINode is created in the node, and its values initialized to -1U.
-void PathProfiler::preparePHI(BLInstrumentationNode* node) {
- BasicBlock* block = node->getBlock();
- BasicBlock::iterator insertPoint = block->getFirstInsertionPt();
- pred_iterator PB = pred_begin(node->getBlock()),
- PE = pred_end(node->getBlock());
- PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context),
- std::distance(PB, PE), "pathNumber",
- insertPoint );
- node->setPathPHI(phi);
- node->setStartingPathNumber(phi);
- node->setEndingPathNumber(phi);
-
- for(pred_iterator predIt = PB; predIt != PE; predIt++) {
- BasicBlock* pred = (*predIt);
-
- if(pred != NULL)
- phi->addIncoming(createIncrementConstant((long)-1, 32), pred);
- }
-}
-
-// Inserts source's pathNumber Value* into target. Target may or may not
-// have multiple predecessors, and may or may not have its phiNode
-// initalized.
-void PathProfiler::pushValueIntoNode(BLInstrumentationNode* source,
- BLInstrumentationNode* target) {
- if(target->getBlock() == NULL)
- return;
-
-
- if(target->getNumberPredEdges() <= 1) {
- assert(target->getStartingPathNumber() == NULL &&
- "Target already has path number");
- target->setStartingPathNumber(source->getEndingPathNumber());
- target->setEndingPathNumber(source->getEndingPathNumber());
- DEBUG(dbgs() << " Passing path number"
- << (source->getEndingPathNumber() ? "" : " (null)")
- << " value through.\n");
- } else {
- if(target->getPathPHI() == NULL) {
- DEBUG(dbgs() << " Initializing PHI node for block '"
- << target->getName() << "'\n");
- preparePHI(target);
- }
- pushValueIntoPHI(target, source);
- DEBUG(dbgs() << " Passing number value into PHI for block '"
- << target->getName() << "'\n");
- }
-}
-
-// Inserts source's pathNumber Value* into the appropriate slot of
-// target's phiNode.
-void PathProfiler::pushValueIntoPHI(BLInstrumentationNode* target,
- BLInstrumentationNode* source) {
- PHINode* phi = target->getPathPHI();
- assert(phi != NULL && " Tried to push value into node with PHI, but node"
- " actually had no PHI.");
- phi->removeIncomingValue(source->getBlock(), false);
- phi->addIncoming(source->getEndingPathNumber(), source->getBlock());
-}
-
-// The Value* in node, oldVal, is updated with a Value* correspodning to
-// oldVal + addition.
-void PathProfiler::insertNumberIncrement(BLInstrumentationNode* node,
- Value* addition, bool atBeginning) {
- BasicBlock* block = node->getBlock();
- assert(node->getStartingPathNumber() != NULL);
- assert(node->getEndingPathNumber() != NULL);
-
- BasicBlock::iterator insertPoint;
-
- if( atBeginning )
- insertPoint = block->getFirstInsertionPt();
- else
- insertPoint = block->getTerminator();
-
- DEBUG(errs() << " Creating addition instruction.\n");
- Value* newpn = BinaryOperator::Create(Instruction::Add,
- node->getStartingPathNumber(),
- addition, "pathNumber", insertPoint);
-
- node->setEndingPathNumber(newpn);
-
- if( atBeginning )
- node->setStartingPathNumber(newpn);
-}
-
-// Creates a counter increment in the given node. The Value* in node is
-// taken as the index into an array or hash table. The hash table access
-// is a call to the runtime.
-void PathProfiler::insertCounterIncrement(Value* incValue,
- BasicBlock::iterator insertPoint,
- BLInstrumentationDag* dag,
- bool increment) {
- // Counter increment for array
- if( dag->getNumberOfPaths() <= HASH_THRESHHOLD ) {
- // Get pointer to the array location
- std::vector<Value*> gepIndices(2);
- gepIndices[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
- gepIndices[1] = incValue;
-
- GetElementPtrInst* pcPointer =
- GetElementPtrInst::Create(dag->getCounterArray(), gepIndices,
- "counterInc", insertPoint);
-
- // Load from the array - call it oldPC
- LoadInst* oldPc = new LoadInst(pcPointer, "oldPC", insertPoint);
-
- // Test to see whether adding 1 will overflow the counter
- ICmpInst* isMax = new ICmpInst(insertPoint, CmpInst::ICMP_ULT, oldPc,
- createIncrementConstant(0xffffffff, 32),
- "isMax");
-
- // Select increment for the path counter based on overflow
- SelectInst* inc =
- SelectInst::Create( isMax, createIncrementConstant(increment?1:-1,32),
- createIncrementConstant(0,32),
- "pathInc", insertPoint);
-
- // newPc = oldPc + inc
- BinaryOperator* newPc = BinaryOperator::Create(Instruction::Add,
- oldPc, inc, "newPC",
- insertPoint);
-
- // Store back in to the array
- new StoreInst(newPc, pcPointer, insertPoint);
- } else { // Counter increment for hash
- std::vector<Value*> args(2);
- args[0] = ConstantInt::get(Type::getInt32Ty(*Context),
- currentFunctionNumber);
- args[1] = incValue;
-
- CallInst::Create(
- increment ? llvmIncrementHashFunction : llvmDecrementHashFunction,
- args, "", insertPoint);
- }
-}
-
-// Inserts instrumentation for the given edge
-//
-// Pre: The edge's source node has pathNumber set if edge is non zero
-// path number increment.
-//
-// Post: Edge's target node has a pathNumber set to the path number Value
-// corresponding to the value of the path register after edge's
-// execution.
-//
-// FIXME: This should be reworked so it's not recursive.
-void PathProfiler::insertInstrumentationStartingAt(BLInstrumentationEdge* edge,
- BLInstrumentationDag* dag) {
- // Mark the edge as instrumented
- edge->setHasInstrumentation(true);
- DEBUG(dbgs() << "\nInstrumenting edge: " << (*edge) << "\n");
-
- // create a new node for this edge's instrumentation
- splitCritical(edge, dag);
-
- BLInstrumentationNode* sourceNode = (BLInstrumentationNode*)edge->getSource();
- BLInstrumentationNode* targetNode = (BLInstrumentationNode*)edge->getTarget();
- BLInstrumentationNode* instrumentNode;
- BLInstrumentationNode* nextSourceNode;
-
- bool atBeginning = false;
-
- // Source node has only 1 successor so any information can be simply
- // inserted in to it without splitting
- if( sourceNode->getBlock() && sourceNode->getNumberSuccEdges() <= 1) {
- DEBUG(dbgs() << " Potential instructions to be placed in: "
- << sourceNode->getName() << " (at end)\n");
- instrumentNode = sourceNode;
- nextSourceNode = targetNode; // ... since we never made any new nodes
- }
-
- // The target node only has one predecessor, so we can safely insert edge
- // instrumentation into it. If there was splitting, it must have been
- // successful.
- else if( targetNode->getNumberPredEdges() == 1 ) {
- DEBUG(dbgs() << " Potential instructions to be placed in: "
- << targetNode->getName() << " (at beginning)\n");
- pushValueIntoNode(sourceNode, targetNode);
- instrumentNode = targetNode;
- nextSourceNode = NULL; // ... otherwise we'll just keep splitting
- atBeginning = true;
- }
-
- // Somehow, splitting must have failed.
- else {
- errs() << "Instrumenting could not split a critical edge.\n";
- DEBUG(dbgs() << " Couldn't split edge " << (*edge) << ".\n");
- return;
- }
-
- // Insert instrumentation if this is a back or split edge
- if( edge->getType() == BallLarusEdge::BACKEDGE ||
- edge->getType() == BallLarusEdge::SPLITEDGE ) {
- BLInstrumentationEdge* top =
- (BLInstrumentationEdge*) edge->getPhonyRoot();
- BLInstrumentationEdge* bottom =
- (BLInstrumentationEdge*) edge->getPhonyExit();
-
- assert( top->isInitialization() && " Top phony edge did not"
- " contain a path number initialization.");
- assert( bottom->isCounterIncrement() && " Bottom phony edge"
- " did not contain a path counter increment.");
-
- // split edge has yet to be initialized
- if( !instrumentNode->getEndingPathNumber() ) {
- instrumentNode->setStartingPathNumber(createIncrementConstant(0,32));
- instrumentNode->setEndingPathNumber(createIncrementConstant(0,32));
- }
-
- BasicBlock::iterator insertPoint = atBeginning ?
- instrumentNode->getBlock()->getFirstInsertionPt() :
- instrumentNode->getBlock()->getTerminator();
-
- // add information from the bottom edge, if it exists
- if( bottom->getIncrement() ) {
- Value* newpn =
- BinaryOperator::Create(Instruction::Add,
- instrumentNode->getStartingPathNumber(),
- createIncrementConstant(bottom),
- "pathNumber", insertPoint);
- instrumentNode->setEndingPathNumber(newpn);
- }
-
- insertCounterIncrement(instrumentNode->getEndingPathNumber(),
- insertPoint, dag);
-
- if( atBeginning )
- instrumentNode->setStartingPathNumber(createIncrementConstant(top));
-
- instrumentNode->setEndingPathNumber(createIncrementConstant(top));
-
- // Check for path counter increments
- if( top->isCounterIncrement() ) {
- insertCounterIncrement(instrumentNode->getEndingPathNumber(),
- instrumentNode->getBlock()->getTerminator(),dag);
- instrumentNode->setEndingPathNumber(0);
- }
- }
-
- // Insert instrumentation if this is a normal edge
- else {
- BasicBlock::iterator insertPoint = atBeginning ?
- instrumentNode->getBlock()->getFirstInsertionPt() :
- instrumentNode->getBlock()->getTerminator();
-
- if( edge->isInitialization() ) { // initialize path number
- instrumentNode->setEndingPathNumber(createIncrementConstant(edge));
- } else if( edge->getIncrement() ) {// increment path number
- Value* newpn =
- BinaryOperator::Create(Instruction::Add,
- instrumentNode->getStartingPathNumber(),
- createIncrementConstant(edge),
- "pathNumber", insertPoint);
- instrumentNode->setEndingPathNumber(newpn);
-
- if( atBeginning )
- instrumentNode->setStartingPathNumber(newpn);
- }
-
- // Check for path counter increments
- if( edge->isCounterIncrement() ) {
- insertCounterIncrement(instrumentNode->getEndingPathNumber(),
- insertPoint, dag);
- instrumentNode->setEndingPathNumber(0);
- }
- }
-
- // Push it along
- if (nextSourceNode && instrumentNode->getEndingPathNumber())
- pushValueIntoNode(instrumentNode, nextSourceNode);
-
- // Add all the successors
- for( BLEdgeIterator next = targetNode->succBegin(),
- end = targetNode->succEnd(); next != end; next++ ) {
- // So long as it is un-instrumented, add it to the list
- if( !((BLInstrumentationEdge*)(*next))->hasInstrumentation() )
- insertInstrumentationStartingAt((BLInstrumentationEdge*)*next,dag);
- else
- DEBUG(dbgs() << " Edge " << *(BLInstrumentationEdge*)(*next)
- << " already instrumented.\n");
- }
-}
-
-// Inserts instrumentation according to the marked edges in dag. Phony edges
-// must be unlinked from the DAG, but accessible from the backedges. Dag
-// must have initializations, path number increments, and counter increments
-// present.
-//
-// Counter storage is created here.
-void PathProfiler::insertInstrumentation(
- BLInstrumentationDag& dag, Module &M) {
-
- BLInstrumentationEdge* exitRootEdge =
- (BLInstrumentationEdge*) dag.getExitRootEdge();
- insertInstrumentationStartingAt(exitRootEdge, &dag);
-
- // Iterate through each call edge and apply the appropriate hash increment
- // and decrement functions
- BLEdgeVector callEdges = dag.getCallPhonyEdges();
- for( BLEdgeIterator edge = callEdges.begin(),
- end = callEdges.end(); edge != end; edge++ ) {
- BLInstrumentationNode* node =
- (BLInstrumentationNode*)(*edge)->getSource();
- BasicBlock::iterator insertPoint = node->getBlock()->getFirstInsertionPt();
-
- // Find the first function call
- while( ((Instruction&)(*insertPoint)).getOpcode() != Instruction::Call )
- insertPoint++;
-
- DEBUG(dbgs() << "\nInstrumenting method call block '"
- << node->getBlock()->getName() << "'\n");
- DEBUG(dbgs() << " Path number initialized: "
- << ((node->getStartingPathNumber()) ? "yes" : "no") << "\n");
-
- Value* newpn;
- if( node->getStartingPathNumber() ) {
- long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement();
- if ( inc )
- newpn = BinaryOperator::Create(Instruction::Add,
- node->getStartingPathNumber(),
- createIncrementConstant(inc,32),
- "pathNumber", insertPoint);
- else
- newpn = node->getStartingPathNumber();
- } else {
- newpn = (Value*)createIncrementConstant(
- ((BLInstrumentationEdge*)(*edge))->getIncrement(), 32);
- }
-
- insertCounterIncrement(newpn, insertPoint, &dag);
- insertCounterIncrement(newpn, node->getBlock()->getTerminator(),
- &dag, false);
- }
-}
-
-// Entry point of the module
-void PathProfiler::runOnFunction(std::vector<Constant*> &ftInit,
- Function &F, Module &M) {
- // Build DAG from CFG
- BLInstrumentationDag dag = BLInstrumentationDag(F);
- dag.init();
-
- // give each path a unique integer value
- dag.calculatePathNumbers();
-
- // modify path increments to increase the efficiency
- // of instrumentation
- dag.calculateSpanningTree();
- dag.calculateChordIncrements();
- dag.pushInitialization();
- dag.pushCounters();
- dag.unlinkPhony();
-
- // potentially generate .dot graph for the dag
- if (DotPathDag)
- dag.generateDotGraph ();
-
- // Should we store the information in an array or hash
- if( dag.getNumberOfPaths() <= HASH_THRESHHOLD ) {
- Type* t = ArrayType::get(Type::getInt32Ty(*Context),
- dag.getNumberOfPaths());
-
- dag.setCounterArray(new GlobalVariable(M, t, false,
- GlobalValue::InternalLinkage,
- Constant::getNullValue(t), ""));
- }
-
- insertInstrumentation(dag, M);
-
- // Add to global function reference table
- unsigned type;
- Type* voidPtr = TypeBuilder<types::i<8>*, true>::get(*Context);
-
- if( dag.getNumberOfPaths() <= HASH_THRESHHOLD )
- type = ProfilingArray;
- else
- type = ProfilingHash;
-
- std::vector<Constant*> entryArray(3);
- entryArray[0] = createIncrementConstant(type,32);
- entryArray[1] = createIncrementConstant(dag.getNumberOfPaths(),32);
- entryArray[2] = dag.getCounterArray() ?
- ConstantExpr::getBitCast(dag.getCounterArray(), voidPtr) :
- Constant::getNullValue(voidPtr);
-
- StructType* at = ftEntryTypeBuilder::get(*Context);
- ConstantStruct* functionEntry =
- (ConstantStruct*)ConstantStruct::get(at, entryArray);
- ftInit.push_back(functionEntry);
-}
-
-// Output the bitcode if we want to observe instrumentation changess
-#define PRINT_MODULE dbgs() << \
- "\n\n============= MODULE BEGIN ===============\n" << M << \
- "\n============== MODULE END ================\n"
-
-bool PathProfiler::runOnModule(Module &M) {
- Context = &M.getContext();
-
- DEBUG(dbgs()
- << "****************************************\n"
- << "****************************************\n"
- << "** **\n"
- << "** PATH PROFILING INSTRUMENTATION **\n"
- << "** **\n"
- << "****************************************\n"
- << "****************************************\n");
-
- // No main, no instrumentation!
- Function *Main = M.getFunction("main");
-
- // Using fortran? ... this kind of works
- if (!Main)
- Main = M.getFunction("MAIN__");
-
- if (!Main) {
- errs() << "WARNING: cannot insert path profiling into a module"
- << " with no main function!\n";
- return false;
- }
-
- llvmIncrementHashFunction = M.getOrInsertFunction(
- "llvm_increment_path_count",
- Type::getVoidTy(*Context), // return type
- Type::getInt32Ty(*Context), // function number
- Type::getInt32Ty(*Context), // path number
- NULL );
-
- llvmDecrementHashFunction = M.getOrInsertFunction(
- "llvm_decrement_path_count",
- Type::getVoidTy(*Context), // return type
- Type::getInt32Ty(*Context), // function number
- Type::getInt32Ty(*Context), // path number
- NULL );
-
- std::vector<Constant*> ftInit;
- unsigned functionNumber = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) {
- if (F->isDeclaration())
- continue;
-
- DEBUG(dbgs() << "Function: " << F->getName() << "\n");
- functionNumber++;
-
- // set function number
- currentFunctionNumber = functionNumber;
- runOnFunction(ftInit, *F, M);
- }
-
- Type *t = ftEntryTypeBuilder::get(*Context);
- ArrayType* ftArrayType = ArrayType::get(t, ftInit.size());
- Constant* ftInitConstant = ConstantArray::get(ftArrayType, ftInit);
-
- DEBUG(dbgs() << " ftArrayType:" << *ftArrayType << "\n");
-
- GlobalVariable* functionTable =
- new GlobalVariable(M, ftArrayType, false, GlobalValue::InternalLinkage,
- ftInitConstant, "functionPathTable");
- Type *eltType = ftArrayType->getTypeAtIndex((unsigned)0);
- InsertProfilingInitCall(Main, "llvm_start_path_profiling", functionTable,
- PointerType::getUnqual(eltType));
-
- DEBUG(PRINT_MODULE);
-
- return true;
-}
-
-// If this edge is a critical edge, then inserts a node at this edge.
-// This edge becomes the first edge, and a new BallLarusEdge is created.
-// Returns true if the edge was split
-bool PathProfiler::splitCritical(BLInstrumentationEdge* edge,
- BLInstrumentationDag* dag) {
- unsigned succNum = edge->getSuccessorNumber();
- BallLarusNode* sourceNode = edge->getSource();
- BallLarusNode* targetNode = edge->getTarget();
- BasicBlock* sourceBlock = sourceNode->getBlock();
- BasicBlock* targetBlock = targetNode->getBlock();
-
- if(sourceBlock == NULL || targetBlock == NULL
- || sourceNode->getNumberSuccEdges() <= 1
- || targetNode->getNumberPredEdges() == 1 ) {
- return(false);
- }
-
- TerminatorInst* terminator = sourceBlock->getTerminator();
-
- if( SplitCriticalEdge(terminator, succNum, this, false)) {
- BasicBlock* newBlock = terminator->getSuccessor(succNum);
- dag->splitUpdate(edge, newBlock);
- return(true);
- } else
- return(false);
-}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp
deleted file mode 100644
index 4b3de6d..0000000
--- a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-//===- ProfilingUtils.cpp - Helper functions shared by profilers ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a few helper functions which are used by profile
-// instrumentation code to instrument the code. This allows the profiler pass
-// to worry about *what* to insert, and these functions take care of *how* to do
-// it.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ProfilingUtils.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-
-void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
- GlobalValue *Array,
- PointerType *arrayType) {
- LLVMContext &Context = MainFn->getContext();
- Type *ArgVTy =
- PointerType::getUnqual(Type::getInt8PtrTy(Context));
- PointerType *UIntPtr = arrayType ? arrayType :
- Type::getInt32PtrTy(Context);
- Module &M = *MainFn->getParent();
- Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context),
- Type::getInt32Ty(Context),
- ArgVTy, UIntPtr,
- Type::getInt32Ty(Context),
- (Type *)0);
-
- // This could force argc and argv into programs that wouldn't otherwise have
- // them, but instead we just pass null values in.
- std::vector<Value*> Args(4);
- Args[0] = Constant::getNullValue(Type::getInt32Ty(Context));
- Args[1] = Constant::getNullValue(ArgVTy);
-
- // Skip over any allocas in the entry block.
- BasicBlock *Entry = MainFn->begin();
- BasicBlock::iterator InsertPos = Entry->begin();
- while (isa<AllocaInst>(InsertPos)) ++InsertPos;
-
- std::vector<Constant*> GEPIndices(2,
- Constant::getNullValue(Type::getInt32Ty(Context)));
- unsigned NumElements = 0;
- if (Array) {
- Args[2] = ConstantExpr::getGetElementPtr(Array, GEPIndices);
- NumElements =
- cast<ArrayType>(Array->getType()->getElementType())->getNumElements();
- } else {
- // If this profiling instrumentation doesn't have a constant array, just
- // pass null.
- Args[2] = ConstantPointerNull::get(UIntPtr);
- }
- Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements);
-
- CallInst *InitCall = CallInst::Create(InitFn, Args, "newargc", InsertPos);
-
- // If argc or argv are not available in main, just pass null values in.
- Function::arg_iterator AI;
- switch (MainFn->arg_size()) {
- default:
- case 2:
- AI = MainFn->arg_begin(); ++AI;
- if (AI->getType() != ArgVTy) {
- Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy,
- false);
- InitCall->setArgOperand(1,
- CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall));
- } else {
- InitCall->setArgOperand(1, AI);
- }
- /* FALL THROUGH */
-
- case 1:
- AI = MainFn->arg_begin();
- // If the program looked at argc, have it look at the return value of the
- // init call instead.
- if (!AI->getType()->isIntegerTy(32)) {
- Instruction::CastOps opcode;
- if (!AI->use_empty()) {
- opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true);
- AI->replaceAllUsesWith(
- CastInst::Create(opcode, InitCall, AI->getType(), "", InsertPos));
- }
- opcode = CastInst::getCastOpcode(AI, true,
- Type::getInt32Ty(Context), true);
- InitCall->setArgOperand(0,
- CastInst::Create(opcode, AI, Type::getInt32Ty(Context),
- "argc.cast", InitCall));
- } else {
- AI->replaceAllUsesWith(InitCall);
- InitCall->setArgOperand(0, AI);
- }
-
- case 0: break;
- }
-}
-
-void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
- GlobalValue *CounterArray, bool beginning) {
- // Insert the increment after any alloca or PHI instructions...
- BasicBlock::iterator InsertPos = beginning ? BB->getFirstInsertionPt() :
- BB->getTerminator();
- while (isa<AllocaInst>(InsertPos))
- ++InsertPos;
-
- LLVMContext &Context = BB->getContext();
-
- // Create the getelementptr constant expression
- std::vector<Constant*> Indices(2);
- Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context));
- Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum);
- Constant *ElementPtr =
- ConstantExpr::getGetElementPtr(CounterArray, Indices);
-
- // Load, increment and store the value back.
- Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos);
- Value *NewVal = BinaryOperator::Create(Instruction::Add, OldVal,
- ConstantInt::get(Type::getInt32Ty(Context), 1),
- "NewFuncCounter", InsertPos);
- new StoreInst(NewVal, ElementPtr, InsertPos);
-}
-
-void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) {
- // llvm.global_dtors is an array of type { i32, void ()* }. Prepare those
- // types.
- Type *GlobalDtorElems[2] = {
- Type::getInt32Ty(Mod->getContext()),
- FunctionType::get(Type::getVoidTy(Mod->getContext()), false)->getPointerTo()
- };
- StructType *GlobalDtorElemTy =
- StructType::get(Mod->getContext(), GlobalDtorElems, false);
-
- // Construct the new element we'll be adding.
- Constant *Elem[2] = {
- ConstantInt::get(Type::getInt32Ty(Mod->getContext()), 65535),
- ConstantExpr::getBitCast(Callee, GlobalDtorElems[1])
- };
-
- // If llvm.global_dtors exists, make a copy of the things in its list and
- // delete it, to replace it with one that has a larger array type.
- std::vector<Constant *> dtors;
- if (GlobalVariable *GlobalDtors = Mod->getNamedGlobal("llvm.global_dtors")) {
- if (ConstantArray *InitList =
- dyn_cast<ConstantArray>(GlobalDtors->getInitializer())) {
- for (unsigned i = 0, e = InitList->getType()->getNumElements();
- i != e; ++i)
- dtors.push_back(cast<Constant>(InitList->getOperand(i)));
- }
- GlobalDtors->eraseFromParent();
- }
-
- // Build up llvm.global_dtors with our new item in it.
- GlobalVariable *GlobalDtors = new GlobalVariable(
- *Mod, ArrayType::get(GlobalDtorElemTy, 1), false,
- GlobalValue::AppendingLinkage, NULL, "llvm.global_dtors");
-
- dtors.push_back(ConstantStruct::get(GlobalDtorElemTy, Elem));
- GlobalDtors->setInitializer(ConstantArray::get(
- cast<ArrayType>(GlobalDtors->getType()->getElementType()), dtors));
-}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h
deleted file mode 100644
index 09b2217..0000000
--- a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h
+++ /dev/null
@@ -1,36 +0,0 @@
-//===- ProfilingUtils.h - Helper functions shared by profilers --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a few helper functions which are used by profile
-// instrumentation code to instrument the code. This allows the profiler pass
-// to worry about *what* to insert, and these functions take care of *how* to do
-// it.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PROFILINGUTILS_H
-#define PROFILINGUTILS_H
-
-namespace llvm {
- class BasicBlock;
- class Function;
- class GlobalValue;
- class Module;
- class PointerType;
-
- void InsertProfilingInitCall(Function *MainFn, const char *FnName,
- GlobalValue *Arr = 0,
- PointerType *arrayType = 0);
- void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
- GlobalValue *CounterArray,
- bool beginning = true);
- void InsertProfilingShutdownCall(Function *Callee, Module *Mod);
-}
-
-#endif
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 299060a..89fb746 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -41,8 +41,8 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/BlackList.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Transforms/Utils/SpecialCaseList.h"
using namespace llvm;
@@ -99,7 +99,7 @@ struct ThreadSanitizer : public FunctionPass {
DataLayout *TD;
Type *IntptrTy;
SmallString<64> BlacklistFile;
- OwningPtr<BlackList> BL;
+ OwningPtr<SpecialCaseList> BL;
IntegerType *OrdTy;
// Callbacks to run-time library are computed in doInitialization.
Function *TsanFuncEntry;
@@ -227,7 +227,7 @@ bool ThreadSanitizer::doInitialization(Module &M) {
TD = getAnalysisIfAvailable<DataLayout>();
if (!TD)
return false;
- BL.reset(new BlackList(BlacklistFile));
+ BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
// Always insert a call to __tsan_init into the module's CTORs.
IRBuilder<> IRB(M.getContext());
@@ -240,12 +240,8 @@ bool ThreadSanitizer::doInitialization(Module &M) {
}
static bool isVtableAccess(Instruction *I) {
- if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa)) {
- if (Tag->getNumOperands() < 1) return false;
- if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) {
- if (Tag1->getString() == "vtable pointer") return true;
- }
- }
+ if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa))
+ return Tag->isTBAAVtableAccess();
return false;
}
@@ -362,7 +358,7 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
// (e.g. variables that do not escape, etc).
// Instrument memory accesses.
- if (ClInstrumentMemoryAccesses)
+ if (ClInstrumentMemoryAccesses && F.hasFnAttribute(Attribute::SanitizeThread))
for (size_t i = 0, n = AllLoadsAndStores.size(); i < n; ++i) {
Res |= instrumentLoadOrStore(AllLoadsAndStores[i]);
}
@@ -579,7 +575,7 @@ int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr) {
// Ignore all unusual sizes.
return -1;
}
- size_t Idx = CountTrailingZeros_32(TypeSize / 8);
+ size_t Idx = countTrailingZeros(TypeSize / 8);
assert(Idx < kNumberOfAccessSizes);
return Idx;
}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
new file mode 100644
index 0000000..4eac39d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
@@ -0,0 +1,186 @@
+//===- ARCRuntimeEntryPoints.h - ObjC ARC Optimization --*- C++ -*---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file contains a class ARCRuntimeEntryPoints for use in
+/// creating/managing references to entry points to the arc objective c runtime.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_ARCRUNTIMEENTRYPOINTS_H
+#define LLVM_TRANSFORMS_SCALAR_ARCRUNTIMEENTRYPOINTS_H
+
+#include "ObjCARC.h"
+
+namespace llvm {
+namespace objcarc {
+
+/// Declarations for ObjC runtime functions and constants. These are initialized
+/// lazily to avoid cluttering up the Module with unused declarations.
+class ARCRuntimeEntryPoints {
+public:
+ enum EntryPointType {
+ EPT_AutoreleaseRV,
+ EPT_Release,
+ EPT_Retain,
+ EPT_RetainBlock,
+ EPT_Autorelease,
+ EPT_StoreStrong,
+ EPT_RetainRV,
+ EPT_RetainAutorelease,
+ EPT_RetainAutoreleaseRV
+ };
+
+ ARCRuntimeEntryPoints() : TheModule(0),
+ AutoreleaseRV(0),
+ Release(0),
+ Retain(0),
+ RetainBlock(0),
+ Autorelease(0),
+ StoreStrong(0),
+ RetainRV(0),
+ RetainAutorelease(0),
+ RetainAutoreleaseRV(0) { }
+
+ ~ARCRuntimeEntryPoints() { }
+
+ void Initialize(Module *M) {
+ TheModule = M;
+ AutoreleaseRV = 0;
+ Release = 0;
+ Retain = 0;
+ RetainBlock = 0;
+ Autorelease = 0;
+ StoreStrong = 0;
+ RetainRV = 0;
+ RetainAutorelease = 0;
+ RetainAutoreleaseRV = 0;
+ }
+
+ Constant *get(const EntryPointType entry) {
+ assert(TheModule != 0 && "Not initialized.");
+
+ switch (entry) {
+ case EPT_AutoreleaseRV:
+ return getI8XRetI8XEntryPoint(AutoreleaseRV,
+ "objc_autoreleaseReturnValue", true);
+ case EPT_Release:
+ return getVoidRetI8XEntryPoint(Release, "objc_release");
+ case EPT_Retain:
+ return getI8XRetI8XEntryPoint(Retain, "objc_retain", true);
+ case EPT_RetainBlock:
+ return getI8XRetI8XEntryPoint(RetainBlock, "objc_retainBlock", false);
+ case EPT_Autorelease:
+ return getI8XRetI8XEntryPoint(Autorelease, "objc_autorelease", true);
+ case EPT_StoreStrong:
+ return getI8XRetI8XXI8XEntryPoint(StoreStrong, "objc_storeStrong");
+ case EPT_RetainRV:
+ return getI8XRetI8XEntryPoint(RetainRV,
+ "objc_retainAutoreleasedReturnValue", true);
+ case EPT_RetainAutorelease:
+ return getI8XRetI8XEntryPoint(RetainAutorelease, "objc_retainAutorelease",
+ true);
+ case EPT_RetainAutoreleaseRV:
+ return getI8XRetI8XEntryPoint(RetainAutoreleaseRV,
+ "objc_retainAutoreleaseReturnValue", true);
+ }
+
+ llvm_unreachable("Switch should be a covered switch.");
+ }
+
+private:
+ /// Cached reference to the module which we will insert declarations into.
+ Module *TheModule;
+
+ /// Declaration for ObjC runtime function objc_autoreleaseReturnValue.
+ Constant *AutoreleaseRV;
+ /// Declaration for ObjC runtime function objc_release.
+ Constant *Release;
+ /// Declaration for ObjC runtime function objc_retain.
+ Constant *Retain;
+ /// Declaration for ObjC runtime function objc_retainBlock.
+ Constant *RetainBlock;
+ /// Declaration for ObjC runtime function objc_autorelease.
+ Constant *Autorelease;
+ /// Declaration for objc_storeStrong().
+ Constant *StoreStrong;
+ /// Declaration for objc_retainAutoreleasedReturnValue().
+ Constant *RetainRV;
+ /// Declaration for objc_retainAutorelease().
+ Constant *RetainAutorelease;
+ /// Declaration for objc_retainAutoreleaseReturnValue().
+ Constant *RetainAutoreleaseRV;
+
+ Constant *getVoidRetI8XEntryPoint(Constant *&Decl,
+ const char *Name) {
+ if (Decl)
+ return Decl;
+
+ LLVMContext &C = TheModule->getContext();
+ Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
+ AttributeSet Attr =
+ AttributeSet().addAttribute(C, AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ FunctionType *Fty = FunctionType::get(Type::getVoidTy(C), Params,
+ /*isVarArg=*/false);
+ return Decl = TheModule->getOrInsertFunction(Name, Fty, Attr);
+ }
+
+ Constant *getI8XRetI8XEntryPoint(Constant *& Decl,
+ const char *Name,
+ bool NoUnwind = false) {
+ if (Decl)
+ return Decl;
+
+ LLVMContext &C = TheModule->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *Params[] = { I8X };
+ FunctionType *Fty = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttributeSet Attr = AttributeSet();
+
+ if (NoUnwind)
+ Attr = Attr.addAttribute(C, AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+
+ return Decl = TheModule->getOrInsertFunction(Name, Fty, Attr);
+ }
+
+ Constant *getI8XRetI8XXI8XEntryPoint(Constant *&Decl,
+ const char *Name) {
+ if (Decl)
+ return Decl;
+
+ LLVMContext &C = TheModule->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *I8XX = PointerType::getUnqual(I8X);
+ Type *Params[] = { I8XX, I8X };
+
+ AttributeSet Attr =
+ AttributeSet().addAttribute(C, AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ Attr = Attr.addAttribute(C, 1, Attribute::NoCapture);
+
+ FunctionType *Fty = FunctionType::get(Type::getVoidTy(C), Params,
+ /*isVarArg=*/false);
+
+ return Decl = TheModule->getOrInsertFunction(Name, Fty, Attr);
+ }
+
+}; // class ARCRuntimeEntryPoints
+
+} // namespace objcarc
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_ARCRUNTIMEENTRYPOINTS_H
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h b/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
index 24d358b..617cdf3 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
@@ -1,4 +1,4 @@
-//===- DependencyAnalysis.h - ObjC ARC Optimization ---*- mode: c++ -*-----===//
+//===- DependencyAnalysis.h - ObjC ARC Optimization ---*- C++ -*-----------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
index 39670f3..8044494 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
@@ -1,4 +1,4 @@
-//===- ObjCARC.h - ObjC ARC Optimization --------------*- mode: c++ -*-----===//
+//===- ObjCARC.h - ObjC ARC Optimization --------------*- C++ -*-----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -286,7 +286,9 @@ static inline void EraseInstruction(Instruction *CI) {
if (!Unused) {
// Replace the return value with the argument.
- assert(IsForwarding(GetBasicInstructionClass(CI)) &&
+ assert((IsForwarding(GetBasicInstructionClass(CI)) ||
+ (IsNoopOnNull(GetBasicInstructionClass(CI)) &&
+ isa<ConstantPointerNull>(OldArg))) &&
"Can't delete non-forwarding instruction with users!");
CI->replaceAllUsesWith(OldArg);
}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
index 46b2de7..d18667b 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
@@ -1,4 +1,4 @@
-//===- ObjCARCAliasAnalysis.cpp - ObjC ARC Optimization -*- mode: c++ -*---===//
+//===- ObjCARCAliasAnalysis.cpp - ObjC ARC Optimization -------------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
index 7abe995..41ccfe2 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
@@ -1,4 +1,4 @@
-//===- ObjCARCAliasAnalysis.h - ObjC ARC Optimization -*- mode: c++ -*-----===//
+//===- ObjCARCAliasAnalysis.h - ObjC ARC Optimization -*- C++ -*-----------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index c43f4f4..9d80037 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -28,6 +28,7 @@
#define DEBUG_TYPE "objc-arc-contract"
#include "ObjCARC.h"
+#include "ARCRuntimeEntryPoints.h"
#include "DependencyAnalysis.h"
#include "ProvenanceAnalysis.h"
#include "llvm/ADT/Statistic.h"
@@ -52,23 +53,11 @@ namespace {
AliasAnalysis *AA;
DominatorTree *DT;
ProvenanceAnalysis PA;
+ ARCRuntimeEntryPoints EP;
/// A flag indicating whether this optimization pass should run.
bool Run;
- /// Declarations for ObjC runtime functions, for use in creating calls to
- /// them. These are initialized lazily to avoid cluttering up the Module
- /// with unused declarations.
-
- /// Declaration for objc_storeStrong().
- Constant *StoreStrongCallee;
- /// Declaration for objc_retainAutorelease().
- Constant *RetainAutoreleaseCallee;
- /// Declaration for objc_retainAutoreleaseReturnValue().
- Constant *RetainAutoreleaseRVCallee;
- /// Declaration for objc_retainAutoreleasedReturnValue().
- Constant *RetainRVCallee;
-
/// The inline asm string to insert between calls and RetainRV calls to make
/// the optimization work on targets which need it.
const MDString *RetainRVMarker;
@@ -78,11 +67,6 @@ namespace {
/// "tail".
SmallPtrSet<CallInst *, 8> StoreStrongCalls;
- Constant *getStoreStrongCallee(Module *M);
- Constant *getRetainRVCallee(Module *M);
- Constant *getRetainAutoreleaseCallee(Module *M);
- Constant *getRetainAutoreleaseRVCallee(Module *M);
-
bool OptimizeRetainCall(Function &F, Instruction *Retain);
bool ContractAutorelease(Function &F, Instruction *Autorelease,
@@ -125,74 +109,6 @@ void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
}
-Constant *ObjCARCContract::getStoreStrongCallee(Module *M) {
- if (!StoreStrongCallee) {
- LLVMContext &C = M->getContext();
- Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- Type *I8XX = PointerType::getUnqual(I8X);
- Type *Params[] = { I8XX, I8X };
-
- AttributeSet Attr = AttributeSet()
- .addAttribute(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind)
- .addAttribute(M->getContext(), 1, Attribute::NoCapture);
-
- StoreStrongCallee =
- M->getOrInsertFunction(
- "objc_storeStrong",
- FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
- Attr);
- }
- return StoreStrongCallee;
-}
-
-Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
- if (!RetainAutoreleaseCallee) {
- LLVMContext &C = M->getContext();
- Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- Type *Params[] = { I8X };
- FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
- AttributeSet Attribute =
- AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
- RetainAutoreleaseCallee =
- M->getOrInsertFunction("objc_retainAutorelease", FTy, Attribute);
- }
- return RetainAutoreleaseCallee;
-}
-
-Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
- if (!RetainAutoreleaseRVCallee) {
- LLVMContext &C = M->getContext();
- Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- Type *Params[] = { I8X };
- FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
- AttributeSet Attribute =
- AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
- RetainAutoreleaseRVCallee =
- M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy,
- Attribute);
- }
- return RetainAutoreleaseRVCallee;
-}
-
-Constant *ObjCARCContract::getRetainRVCallee(Module *M) {
- if (!RetainRVCallee) {
- LLVMContext &C = M->getContext();
- Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- Type *Params[] = { I8X };
- FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
- AttributeSet Attribute =
- AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
- RetainRVCallee =
- M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
- Attribute);
- }
- return RetainRVCallee;
-}
-
/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a
/// return value. We do this late so we do not disrupt the dataflow analysis in
/// ObjCARCOpt.
@@ -222,7 +138,8 @@ ObjCARCContract::OptimizeRetainCall(Function &F, Instruction *Retain) {
// We do not have to worry about tail calls/does not throw since
// retain/retainRV have the same properties.
- cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
+ Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_RetainRV);
+ cast<CallInst>(Retain)->setCalledFunction(Decl);
DEBUG(dbgs() << "New: " << *Retain << "\n");
return true;
@@ -272,10 +189,10 @@ ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
" Old Retain: "
<< *Retain << "\n");
- if (Class == IC_AutoreleaseRV)
- Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent()));
- else
- Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent()));
+ Constant *Decl = EP.get(Class == IC_AutoreleaseRV ?
+ ARCRuntimeEntryPoints::EPT_RetainAutoreleaseRV :
+ ARCRuntimeEntryPoints::EPT_RetainAutorelease);
+ Retain->setCalledFunction(Decl);
DEBUG(dbgs() << " New Retain: "
<< *Retain << "\n");
@@ -356,9 +273,8 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
Args[0] = new BitCastInst(Args[0], I8XX, "", Store);
if (Args[1]->getType() != I8X)
Args[1] = new BitCastInst(Args[1], I8X, "", Store);
- CallInst *StoreStrong =
- CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()),
- Args, "", Store);
+ Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_StoreStrong);
+ CallInst *StoreStrong = CallInst::Create(Decl, Args, "", Store);
StoreStrong->setDoesNotThrow();
StoreStrong->setDebugLoc(Store->getDebugLoc());
@@ -381,11 +297,7 @@ bool ObjCARCContract::doInitialization(Module &M) {
if (!Run)
return false;
- // These are initialized lazily.
- StoreStrongCallee = 0;
- RetainAutoreleaseCallee = 0;
- RetainAutoreleaseRVCallee = 0;
- RetainRVCallee = 0;
+ EP.Initialize(&M);
// Initialize RetainRVMarker.
RetainRVMarker = 0;
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 43e2e20..2976df6 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -26,10 +26,12 @@
#define DEBUG_TYPE "objc-arc-opts"
#include "ObjCARC.h"
+#include "ARCRuntimeEntryPoints.h"
#include "DependencyAnalysis.h"
#include "ObjCARCAliasAnalysis.h"
#include "ProvenanceAnalysis.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -107,6 +109,12 @@ namespace {
return std::make_pair(Vector.begin() + Pair.first->second, false);
}
+ iterator find(const KeyT &Key) {
+ typename MapTy::iterator It = Map.find(Key);
+ if (It == Map.end()) return Vector.end();
+ return Vector.begin() + It->second;
+ }
+
const_iterator find(const KeyT &Key) const {
typename MapTy::const_iterator It = Map.find(Key);
if (It == Map.end()) return Vector.end();
@@ -168,91 +176,40 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
return 0;
}
-/// \brief Test whether the given retainable object pointer escapes.
-///
-/// This differs from regular escape analysis in that a use as an
-/// argument to a call is not considered an escape.
-///
-static bool DoesRetainableObjPtrEscape(const User *Ptr) {
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Target: " << *Ptr << "\n");
-
- // Walk the def-use chains.
+/// This is a wrapper around getUnderlyingObjCPtr along the lines of
+/// GetUnderlyingObjects except that it returns early when it sees the first
+/// alloca.
+static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V) {
+ SmallPtrSet<const Value *, 4> Visited;
SmallVector<const Value *, 4> Worklist;
- Worklist.push_back(Ptr);
- // If Ptr has any operands add them as well.
- for (User::const_op_iterator I = Ptr->op_begin(), E = Ptr->op_end(); I != E;
- ++I) {
- Worklist.push_back(*I);
- }
-
- // Ensure we do not visit any value twice.
- SmallPtrSet<const Value *, 8> VisitedSet;
-
+ Worklist.push_back(V);
do {
- const Value *V = Worklist.pop_back_val();
+ const Value *P = Worklist.pop_back_val();
+ P = GetUnderlyingObjCPtr(P);
- DEBUG(dbgs() << "Visiting: " << *V << "\n");
+ if (isa<AllocaInst>(P))
+ return true;
- for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
- UI != UE; ++UI) {
- const User *UUser = *UI;
+ if (!Visited.insert(P))
+ continue;
- DEBUG(dbgs() << "User: " << *UUser << "\n");
+ if (const SelectInst *SI = dyn_cast<const SelectInst>(P)) {
+ Worklist.push_back(SI->getTrueValue());
+ Worklist.push_back(SI->getFalseValue());
+ continue;
+ }
- // Special - Use by a call (callee or argument) is not considered
- // to be an escape.
- switch (GetBasicInstructionClass(UUser)) {
- case IC_StoreWeak:
- case IC_InitWeak:
- case IC_StoreStrong:
- case IC_Autorelease:
- case IC_AutoreleaseRV: {
- DEBUG(dbgs() << "User copies pointer arguments. Pointer Escapes!\n");
- // These special functions make copies of their pointer arguments.
- return true;
- }
- case IC_IntrinsicUser:
- // Use by the use intrinsic is not an escape.
- continue;
- case IC_User:
- case IC_None:
- // Use by an instruction which copies the value is an escape if the
- // result is an escape.
- if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) ||
- isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
-
- if (VisitedSet.insert(UUser)) {
- DEBUG(dbgs() << "User copies value. Ptr escapes if result escapes."
- " Adding to list.\n");
- Worklist.push_back(UUser);
- } else {
- DEBUG(dbgs() << "Already visited node.\n");
- }
- continue;
- }
- // Use by a load is not an escape.
- if (isa<LoadInst>(UUser))
- continue;
- // Use by a store is not an escape if the use is the address.
- if (const StoreInst *SI = dyn_cast<StoreInst>(UUser))
- if (V != SI->getValueOperand())
- continue;
- break;
- default:
- // Regular calls and other stuff are not considered escapes.
- continue;
- }
- // Otherwise, conservatively assume an escape.
- DEBUG(dbgs() << "Assuming ptr escapes.\n");
- return true;
+ if (const PHINode *PN = dyn_cast<const PHINode>(P)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ Worklist.push_back(PN->getIncomingValue(i));
+ continue;
}
} while (!Worklist.empty());
- // No escapes found.
- DEBUG(dbgs() << "Ptr does not escape.\n");
return false;
}
+
/// @}
///
/// \defgroup ARCOpt ARC Optimization.
@@ -300,18 +257,18 @@ STATISTIC(NumNoops, "Number of no-op objc calls eliminated");
STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases");
STATISTIC(NumRets, "Number of return value forwarding "
- "retain+autoreleaes eliminated");
+ "retain+autoreleases eliminated");
STATISTIC(NumRRs, "Number of retain+release paths eliminated");
STATISTIC(NumPeeps, "Number of calls peephole-optimized");
+#ifndef NDEBUG
STATISTIC(NumRetainsBeforeOpt,
- "Number of retains before optimization.");
+ "Number of retains before optimization");
STATISTIC(NumReleasesBeforeOpt,
- "Number of releases before optimization.");
-#ifndef NDEBUG
+ "Number of releases before optimization");
STATISTIC(NumRetainsAfterOpt,
- "Number of retains after optimization.");
+ "Number of retains after optimization");
STATISTIC(NumReleasesAfterOpt,
- "Number of releases after optimization.");
+ "Number of releases after optimization");
#endif
namespace {
@@ -414,14 +371,20 @@ namespace {
/// sequence.
SmallPtrSet<Instruction *, 2> ReverseInsertPts;
+ /// If this is true, we cannot perform code motion but can still remove
+ /// retain/release pairs.
+ bool CFGHazardAfflicted;
+
RRInfo() :
- KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0) {}
+ KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0),
+ CFGHazardAfflicted(false) {}
void clear();
- bool IsTrackingImpreciseReleases() {
- return ReleaseMetadata != 0;
- }
+ /// Conservatively merge the two RRInfo. Returns true if a partial merge has
+ /// occured, false otherwise.
+ bool Merge(const RRInfo &Other);
+
};
}
@@ -431,6 +394,30 @@ void RRInfo::clear() {
ReleaseMetadata = 0;
Calls.clear();
ReverseInsertPts.clear();
+ CFGHazardAfflicted = false;
+}
+
+bool RRInfo::Merge(const RRInfo &Other) {
+ // Conservatively merge the ReleaseMetadata information.
+ if (ReleaseMetadata != Other.ReleaseMetadata)
+ ReleaseMetadata = 0;
+
+ // Conservatively merge the boolean state.
+ KnownSafe &= Other.KnownSafe;
+ IsTailCallRelease &= Other.IsTailCallRelease;
+ CFGHazardAfflicted |= Other.CFGHazardAfflicted;
+
+ // Merge the call sets.
+ Calls.insert(Other.Calls.begin(), Other.Calls.end());
+
+ // Merge the insert point sets. If there are any differences,
+ // that makes this a partial merge.
+ bool Partial = ReverseInsertPts.size() != Other.ReverseInsertPts.size();
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ I = Other.ReverseInsertPts.begin(),
+ E = Other.ReverseInsertPts.end(); I != E; ++I)
+ Partial |= ReverseInsertPts.insert(*I);
+ return Partial;
}
namespace {
@@ -445,22 +432,59 @@ namespace {
bool Partial;
/// The current position in the sequence.
- Sequence Seq : 8;
+ unsigned char Seq : 8;
- public:
/// Unidirectional information about the current sequence.
- ///
- /// TODO: Encapsulate this better.
RRInfo RRI;
+ public:
PtrState() : KnownPositiveRefCount(false), Partial(false),
Seq(S_None) {}
+
+ bool IsKnownSafe() const {
+ return RRI.KnownSafe;
+ }
+
+ void SetKnownSafe(const bool NewValue) {
+ RRI.KnownSafe = NewValue;
+ }
+
+ bool IsTailCallRelease() const {
+ return RRI.IsTailCallRelease;
+ }
+
+ void SetTailCallRelease(const bool NewValue) {
+ RRI.IsTailCallRelease = NewValue;
+ }
+
+ bool IsTrackingImpreciseReleases() const {
+ return RRI.ReleaseMetadata != 0;
+ }
+
+ const MDNode *GetReleaseMetadata() const {
+ return RRI.ReleaseMetadata;
+ }
+
+ void SetReleaseMetadata(MDNode *NewValue) {
+ RRI.ReleaseMetadata = NewValue;
+ }
+
+ bool IsCFGHazardAfflicted() const {
+ return RRI.CFGHazardAfflicted;
+ }
+
+ void SetCFGHazardAfflicted(const bool NewValue) {
+ RRI.CFGHazardAfflicted = NewValue;
+ }
+
void SetKnownPositiveRefCount() {
+ DEBUG(dbgs() << "Setting Known Positive.\n");
KnownPositiveRefCount = true;
}
void ClearKnownPositiveRefCount() {
+ DEBUG(dbgs() << "Clearing Known Positive.\n");
KnownPositiveRefCount = false;
}
@@ -474,7 +498,7 @@ namespace {
}
Sequence GetSeq() const {
- return Seq;
+ return static_cast<Sequence>(Seq);
}
void ClearSequenceProgress() {
@@ -489,13 +513,34 @@ namespace {
}
void Merge(const PtrState &Other, bool TopDown);
+
+ void InsertCall(Instruction *I) {
+ RRI.Calls.insert(I);
+ }
+
+ void InsertReverseInsertPt(Instruction *I) {
+ RRI.ReverseInsertPts.insert(I);
+ }
+
+ void ClearReverseInsertPts() {
+ RRI.ReverseInsertPts.clear();
+ }
+
+ bool HasReverseInsertPts() const {
+ return !RRI.ReverseInsertPts.empty();
+ }
+
+ const RRInfo &GetRRInfo() const {
+ return RRI;
+ }
};
}
void
PtrState::Merge(const PtrState &Other, bool TopDown) {
- Seq = MergeSeqs(Seq, Other.Seq, TopDown);
- KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount;
+ Seq = MergeSeqs(static_cast<Sequence>(Seq), static_cast<Sequence>(Other.Seq),
+ TopDown);
+ KnownPositiveRefCount &= Other.KnownPositiveRefCount;
// If we're not in a sequence (anymore), drop all associated state.
if (Seq == S_None) {
@@ -508,22 +553,11 @@ PtrState::Merge(const PtrState &Other, bool TopDown) {
// mixing them is unsafe.
ClearSequenceProgress();
} else {
- // Conservatively merge the ReleaseMetadata information.
- if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata)
- RRI.ReleaseMetadata = 0;
-
- RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe;
- RRI.IsTailCallRelease = RRI.IsTailCallRelease &&
- Other.RRI.IsTailCallRelease;
- RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end());
-
- // Merge the insert point sets. If there are any differences,
- // that makes this a partial merge.
- Partial = RRI.ReverseInsertPts.size() != Other.RRI.ReverseInsertPts.size();
- for (SmallPtrSet<Instruction *, 2>::const_iterator
- I = Other.RRI.ReverseInsertPts.begin(),
- E = Other.RRI.ReverseInsertPts.end(); I != E; ++I)
- Partial |= RRI.ReverseInsertPts.insert(*I);
+ // Otherwise merge the other PtrState's RRInfo into our RRInfo. At this
+ // point, we know that currently we are not partial. Stash whether or not
+ // the merge operation caused us to undergo a partial merging of reverse
+ // insertion points.
+ Partial = RRI.Merge(Other.RRI);
}
}
@@ -556,7 +590,9 @@ namespace {
SmallVector<BasicBlock *, 2> Succs;
public:
- BBState() : TopDownPathCount(0), BottomUpPathCount(0) {}
+ static const unsigned OverflowOccurredValue;
+
+ BBState() : TopDownPathCount(0), BottomUpPathCount(0) { }
typedef MapTy::iterator ptr_iterator;
typedef MapTy::const_iterator ptr_const_iterator;
@@ -587,14 +623,26 @@ namespace {
/// definition.
void SetAsExit() { BottomUpPathCount = 1; }
+ /// Attempt to find the PtrState object describing the top down state for
+ /// pointer Arg. Return a new initialized PtrState describing the top down
+ /// state for Arg if we do not find one.
PtrState &getPtrTopDownState(const Value *Arg) {
return PerPtrTopDown[Arg];
}
+ /// Attempt to find the PtrState object describing the bottom up state for
+ /// pointer Arg. Return a new initialized PtrState describing the bottom up
+ /// state for Arg if we do not find one.
PtrState &getPtrBottomUpState(const Value *Arg) {
return PerPtrBottomUp[Arg];
}
+ /// Attempt to find the PtrState object describing the bottom up state for
+ /// pointer Arg.
+ ptr_iterator findPtrBottomUpState(const Value *Arg) {
+ return PerPtrBottomUp.find(Arg);
+ }
+
void clearBottomUpPointers() {
PerPtrBottomUp.clear();
}
@@ -608,27 +656,38 @@ namespace {
void MergePred(const BBState &Other);
void MergeSucc(const BBState &Other);
- /// Return the number of possible unique paths from an entry to an exit
+ /// Compute the number of possible unique paths from an entry to an exit
/// which pass through this block. This is only valid after both the
/// top-down and bottom-up traversals are complete.
- unsigned GetAllPathCount() const {
- assert(TopDownPathCount != 0);
- assert(BottomUpPathCount != 0);
- return TopDownPathCount * BottomUpPathCount;
+ ///
+ /// Returns true if overflow occured. Returns false if overflow did not
+ /// occur.
+ bool GetAllPathCountWithOverflow(unsigned &PathCount) const {
+ if (TopDownPathCount == OverflowOccurredValue ||
+ BottomUpPathCount == OverflowOccurredValue)
+ return true;
+ unsigned long long Product =
+ (unsigned long long)TopDownPathCount*BottomUpPathCount;
+ // Overflow occured if any of the upper bits of Product are set or if all
+ // the lower bits of Product are all set.
+ return (Product >> 32) ||
+ ((PathCount = Product) == OverflowOccurredValue);
}
// Specialized CFG utilities.
typedef SmallVectorImpl<BasicBlock *>::const_iterator edge_iterator;
- edge_iterator pred_begin() { return Preds.begin(); }
- edge_iterator pred_end() { return Preds.end(); }
- edge_iterator succ_begin() { return Succs.begin(); }
- edge_iterator succ_end() { return Succs.end(); }
+ edge_iterator pred_begin() const { return Preds.begin(); }
+ edge_iterator pred_end() const { return Preds.end(); }
+ edge_iterator succ_begin() const { return Succs.begin(); }
+ edge_iterator succ_end() const { return Succs.end(); }
void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); }
void addPred(BasicBlock *Pred) { Preds.push_back(Pred); }
bool isExit() const { return Succs.empty(); }
};
+
+ const unsigned BBState::OverflowOccurredValue = 0xffffffff;
}
void BBState::InitFromPred(const BBState &Other) {
@@ -644,13 +703,25 @@ void BBState::InitFromSucc(const BBState &Other) {
/// The top-down traversal uses this to merge information about predecessors to
/// form the initial state for a new block.
void BBState::MergePred(const BBState &Other) {
+ if (TopDownPathCount == OverflowOccurredValue)
+ return;
+
// Other.TopDownPathCount can be 0, in which case it is either dead or a
// loop backedge. Loop backedges are special.
TopDownPathCount += Other.TopDownPathCount;
+ // In order to be consistent, we clear the top down pointers when by adding
+ // TopDownPathCount becomes OverflowOccurredValue even though "true" overflow
+ // has not occured.
+ if (TopDownPathCount == OverflowOccurredValue) {
+ clearTopDownPointers();
+ return;
+ }
+
// Check for overflow. If we have overflow, fall back to conservative
// behavior.
if (TopDownPathCount < Other.TopDownPathCount) {
+ TopDownPathCount = OverflowOccurredValue;
clearTopDownPointers();
return;
}
@@ -676,13 +747,25 @@ void BBState::MergePred(const BBState &Other) {
/// The bottom-up traversal uses this to merge information about successors to
/// form the initial state for a new block.
void BBState::MergeSucc(const BBState &Other) {
+ if (BottomUpPathCount == OverflowOccurredValue)
+ return;
+
// Other.BottomUpPathCount can be 0, in which case it is either dead or a
// loop backedge. Loop backedges are special.
BottomUpPathCount += Other.BottomUpPathCount;
+ // In order to be consistent, we clear the top down pointers when by adding
+ // BottomUpPathCount becomes OverflowOccurredValue even though "true" overflow
+ // has not occured.
+ if (BottomUpPathCount == OverflowOccurredValue) {
+ clearBottomUpPointers();
+ return;
+ }
+
// Check for overflow. If we have overflow, fall back to conservative
// behavior.
if (BottomUpPathCount < Other.BottomUpPathCount) {
+ BottomUpPathCount = OverflowOccurredValue;
clearBottomUpPointers();
return;
}
@@ -991,25 +1074,14 @@ namespace {
class ObjCARCOpt : public FunctionPass {
bool Changed;
ProvenanceAnalysis PA;
+ ARCRuntimeEntryPoints EP;
+
+ // This is used to track if a pointer is stored into an alloca.
+ DenseSet<const Value *> MultiOwnersSet;
/// A flag indicating whether this optimization pass should run.
bool Run;
- /// Declarations for ObjC runtime functions, for use in creating calls to
- /// them. These are initialized lazily to avoid cluttering up the Module
- /// with unused declarations.
-
- /// Declaration for ObjC runtime function objc_autoreleaseReturnValue.
- Constant *AutoreleaseRVCallee;
- /// Declaration for ObjC runtime function objc_release.
- Constant *ReleaseCallee;
- /// Declaration for ObjC runtime function objc_retain.
- Constant *RetainCallee;
- /// Declaration for ObjC runtime function objc_retainBlock.
- Constant *RetainBlockCallee;
- /// Declaration for ObjC runtime function objc_autorelease.
- Constant *AutoreleaseCallee;
-
/// Flags which determine whether each of the interesting runtine functions
/// is in fact used in the current function.
unsigned UsedInThisFunction;
@@ -1032,19 +1104,9 @@ namespace {
unsigned ARCAnnotationProvenanceSourceMDKind;
#endif // ARC_ANNOATIONS
- Constant *getAutoreleaseRVCallee(Module *M);
- Constant *getReleaseCallee(Module *M);
- Constant *getRetainCallee(Module *M);
- Constant *getRetainBlockCallee(Module *M);
- Constant *getAutoreleaseCallee(Module *M);
-
- bool IsRetainBlockOptimizable(const Instruction *Inst);
-
bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
InstructionClass &Class);
- bool OptimizeRetainBlockCall(Function &F, Instruction *RetainBlock,
- InstructionClass &Class);
void OptimizeIndividualCalls(Function &F);
void CheckForCFGHazards(const BasicBlock *BB,
@@ -1078,9 +1140,9 @@ namespace {
MapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases,
Module *M,
- SmallVector<Instruction *, 4> &NewRetains,
- SmallVector<Instruction *, 4> &NewReleases,
- SmallVector<Instruction *, 8> &DeadInsts,
+ SmallVectorImpl<Instruction *> &NewRetains,
+ SmallVectorImpl<Instruction *> &NewReleases,
+ SmallVectorImpl<Instruction *> &DeadInsts,
RRInfo &RetainsToMove,
RRInfo &ReleasesToMove,
Value *Arg,
@@ -1133,101 +1195,6 @@ void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
}
-bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) {
- // Without the magic metadata tag, we have to assume this might be an
- // objc_retainBlock call inserted to convert a block pointer to an id,
- // in which case it really is needed.
- if (!Inst->getMetadata(CopyOnEscapeMDKind))
- return false;
-
- // If the pointer "escapes" (not including being used in a call),
- // the copy may be needed.
- if (DoesRetainableObjPtrEscape(Inst))
- return false;
-
- // Otherwise, it's not needed.
- return true;
-}
-
-Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
- if (!AutoreleaseRVCallee) {
- LLVMContext &C = M->getContext();
- Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- Type *Params[] = { I8X };
- FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
- AttributeSet Attribute =
- AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
- AutoreleaseRVCallee =
- M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy,
- Attribute);
- }
- return AutoreleaseRVCallee;
-}
-
-Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
- if (!ReleaseCallee) {
- LLVMContext &C = M->getContext();
- Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
- AttributeSet Attribute =
- AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
- ReleaseCallee =
- M->getOrInsertFunction(
- "objc_release",
- FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
- Attribute);
- }
- return ReleaseCallee;
-}
-
-Constant *ObjCARCOpt::getRetainCallee(Module *M) {
- if (!RetainCallee) {
- LLVMContext &C = M->getContext();
- Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
- AttributeSet Attribute =
- AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
- RetainCallee =
- M->getOrInsertFunction(
- "objc_retain",
- FunctionType::get(Params[0], Params, /*isVarArg=*/false),
- Attribute);
- }
- return RetainCallee;
-}
-
-Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) {
- if (!RetainBlockCallee) {
- LLVMContext &C = M->getContext();
- Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
- // objc_retainBlock is not nounwind because it calls user copy constructors
- // which could theoretically throw.
- RetainBlockCallee =
- M->getOrInsertFunction(
- "objc_retainBlock",
- FunctionType::get(Params[0], Params, /*isVarArg=*/false),
- AttributeSet());
- }
- return RetainBlockCallee;
-}
-
-Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
- if (!AutoreleaseCallee) {
- LLVMContext &C = M->getContext();
- Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
- AttributeSet Attribute =
- AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
- AutoreleaseCallee =
- M->getOrInsertFunction(
- "objc_autorelease",
- FunctionType::get(Params[0], Params, /*isVarArg=*/false),
- Attribute);
- }
- return AutoreleaseCallee;
-}
-
/// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is
/// not a return value. Or, if it can be paired with an
/// objc_autoreleaseReturnValue, delete the pair and return true.
@@ -1281,7 +1248,8 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
"objc_retain since the operand is not a return value.\n"
"Old = " << *RetainRV << "\n");
- cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent()));
+ Constant *NewDecl = EP.get(ARCRuntimeEntryPoints::EPT_Retain);
+ cast<CallInst>(RetainRV)->setCalledFunction(NewDecl);
DEBUG(dbgs() << "New = " << *RetainRV << "\n");
@@ -1318,8 +1286,8 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
"Old = " << *AutoreleaseRV << "\n");
CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV);
- AutoreleaseRVCI->
- setCalledFunction(getAutoreleaseCallee(F.getParent()));
+ Constant *NewDecl = EP.get(ARCRuntimeEntryPoints::EPT_Autorelease);
+ AutoreleaseRVCI->setCalledFunction(NewDecl);
AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease.
Class = IC_Autorelease;
@@ -1327,40 +1295,6 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
}
-// \brief Attempt to strength reduce objc_retainBlock calls to objc_retain
-// calls.
-//
-// Specifically: If an objc_retainBlock call has the copy_on_escape metadata and
-// does not escape (following the rules of block escaping), strength reduce the
-// objc_retainBlock to an objc_retain.
-//
-// TODO: If an objc_retainBlock call is dominated period by a previous
-// objc_retainBlock call, strength reduce the objc_retainBlock to an
-// objc_retain.
-bool
-ObjCARCOpt::OptimizeRetainBlockCall(Function &F, Instruction *Inst,
- InstructionClass &Class) {
- assert(GetBasicInstructionClass(Inst) == Class);
- assert(IC_RetainBlock == Class);
-
- // If we can not optimize Inst, return false.
- if (!IsRetainBlockOptimizable(Inst))
- return false;
-
- Changed = true;
- ++NumPeeps;
-
- DEBUG(dbgs() << "Strength reduced retainBlock => retain.\n");
- DEBUG(dbgs() << "Old: " << *Inst << "\n");
- CallInst *RetainBlock = cast<CallInst>(Inst);
- RetainBlock->setCalledFunction(getRetainCallee(F.getParent()));
- // Remove copy_on_escape metadata.
- RetainBlock->setMetadata(CopyOnEscapeMDKind, 0);
- Class = IC_Retain;
- DEBUG(dbgs() << "New: " << *Inst << "\n");
- return true;
-}
-
/// Visit each call, one at a time, and make simplifications without doing any
/// additional analysis.
void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
@@ -1437,15 +1371,6 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
}
break;
}
- case IC_RetainBlock:
- // If we strength reduce an objc_retainBlock to an objc_retain, continue
- // onto the objc_retain peephole optimizations. Otherwise break.
- if (!OptimizeRetainBlockCall(F, Inst, Class))
- break;
- // FALLTHROUGH
- case IC_Retain:
- ++NumRetainsBeforeOpt;
- break;
case IC_RetainRV:
if (OptimizeRetainRVCall(F, Inst))
continue;
@@ -1453,9 +1378,6 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
case IC_AutoreleaseRV:
OptimizeAutoreleaseRVCall(F, Inst, Class);
break;
- case IC_Release:
- ++NumReleasesBeforeOpt;
- break;
}
// objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
@@ -1469,9 +1391,10 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
// Create the declaration lazily.
LLVMContext &C = Inst->getContext();
- CallInst *NewCall =
- CallInst::Create(getReleaseCallee(F.getParent()),
- Call->getArgOperand(0), "", Call);
+
+ Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Release);
+ CallInst *NewCall = CallInst::Create(Decl, Call->getArgOperand(0), "",
+ Call);
NewCall->setMetadata(ImpreciseReleaseMDKind, MDNode::get(C, None));
DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) "
@@ -1639,13 +1562,15 @@ static void CheckForUseCFGHazard(const Sequence SuccSSeq,
PtrState &S,
bool &SomeSuccHasSame,
bool &AllSuccsHaveSame,
+ bool &NotAllSeqEqualButKnownSafe,
bool &ShouldContinue) {
switch (SuccSSeq) {
case S_CanRelease: {
- if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
+ if (!S.IsKnownSafe() && !SuccSRRIKnownSafe) {
S.ClearSequenceProgress();
break;
}
+ S.SetCFGHazardAfflicted(true);
ShouldContinue = true;
break;
}
@@ -1655,8 +1580,10 @@ static void CheckForUseCFGHazard(const Sequence SuccSSeq,
case S_Stop:
case S_Release:
case S_MovableRelease:
- if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
+ if (!S.IsKnownSafe() && !SuccSRRIKnownSafe)
AllSuccsHaveSame = false;
+ else
+ NotAllSeqEqualButKnownSafe = true;
break;
case S_Retain:
llvm_unreachable("bottom-up pointer in retain state!");
@@ -1672,7 +1599,8 @@ static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq,
const bool SuccSRRIKnownSafe,
PtrState &S,
bool &SomeSuccHasSame,
- bool &AllSuccsHaveSame) {
+ bool &AllSuccsHaveSame,
+ bool &NotAllSeqEqualButKnownSafe) {
switch (SuccSSeq) {
case S_CanRelease:
SomeSuccHasSame = true;
@@ -1681,8 +1609,10 @@ static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq,
case S_Release:
case S_MovableRelease:
case S_Use:
- if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
+ if (!S.IsKnownSafe() && !SuccSRRIKnownSafe)
AllSuccsHaveSame = false;
+ else
+ NotAllSeqEqualButKnownSafe = true;
break;
case S_Retain:
llvm_unreachable("bottom-up pointer in retain state!");
@@ -1718,6 +1648,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
bool SomeSuccHasSame = false;
bool AllSuccsHaveSame = true;
+ bool NotAllSeqEqualButKnownSafe = false;
succ_const_iterator SI(TI), SE(TI, false);
@@ -1742,24 +1673,24 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
// If we have S_Use or S_CanRelease, perform our check for cfg hazard
// checks.
- const bool SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
+ const bool SuccSRRIKnownSafe = SuccS.IsKnownSafe();
// *NOTE* We do not use Seq from above here since we are allowing for
// S.GetSeq() to change while we are visiting basic blocks.
switch(S.GetSeq()) {
case S_Use: {
bool ShouldContinue = false;
- CheckForUseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S,
- SomeSuccHasSame, AllSuccsHaveSame,
+ CheckForUseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S, SomeSuccHasSame,
+ AllSuccsHaveSame, NotAllSeqEqualButKnownSafe,
ShouldContinue);
if (ShouldContinue)
continue;
break;
}
case S_CanRelease: {
- CheckForCanReleaseCFGHazard(SuccSSeq, SuccSRRIKnownSafe,
- S, SomeSuccHasSame,
- AllSuccsHaveSame);
+ CheckForCanReleaseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S,
+ SomeSuccHasSame, AllSuccsHaveSame,
+ NotAllSeqEqualButKnownSafe);
break;
}
case S_Retain:
@@ -1774,8 +1705,15 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
// If the state at the other end of any of the successor edges
// matches the current state, require all edges to match. This
// guards against loops in the middle of a sequence.
- if (SomeSuccHasSame && !AllSuccsHaveSame)
+ if (SomeSuccHasSame && !AllSuccsHaveSame) {
S.ClearSequenceProgress();
+ } else if (NotAllSeqEqualButKnownSafe) {
+ // If we would have cleared the state foregoing the fact that we are known
+ // safe, stop code motion. This is because whether or not it is safe to
+ // remove RR pairs via KnownSafe is an orthogonal concept to whether we
+ // are allowed to perform code motion.
+ S.SetCFGHazardAfflicted(true);
+ }
}
}
@@ -1812,10 +1750,10 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
Sequence NewSeq = ReleaseMetadata ? S_MovableRelease : S_Release;
ANNOTATE_BOTTOMUP(Inst, Arg, S.GetSeq(), NewSeq);
S.ResetSequenceProgress(NewSeq);
- S.RRI.ReleaseMetadata = ReleaseMetadata;
- S.RRI.KnownSafe = S.HasKnownPositiveRefCount();
- S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
- S.RRI.Calls.insert(Inst);
+ S.SetReleaseMetadata(ReleaseMetadata);
+ S.SetKnownSafe(S.HasKnownPositiveRefCount());
+ S.SetTailCallRelease(cast<CallInst>(Inst)->isTailCall());
+ S.InsertCall(Inst);
S.SetKnownPositiveRefCount();
break;
}
@@ -1839,14 +1777,14 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
case S_Use:
// If OldSeq is not S_Use or OldSeq is S_Use and we are tracking an
// imprecise release, clear our reverse insertion points.
- if (OldSeq != S_Use || S.RRI.IsTrackingImpreciseReleases())
- S.RRI.ReverseInsertPts.clear();
+ if (OldSeq != S_Use || S.IsTrackingImpreciseReleases())
+ S.ClearReverseInsertPts();
// FALL THROUGH
case S_CanRelease:
// Don't do retain+release tracking for IC_RetainRV, because it's
// better to let it remain as the first instruction after a call.
if (Class != IC_RetainRV)
- Retains[Inst] = S.RRI;
+ Retains[Inst] = S.GetRRInfo();
S.ClearSequenceProgress();
break;
case S_None:
@@ -1866,6 +1804,28 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
case IC_None:
// These are irrelevant.
return NestingDetected;
+ case IC_User:
+ // If we have a store into an alloca of a pointer we are tracking, the
+ // pointer has multiple owners implying that we must be more conservative.
+ //
+ // This comes up in the context of a pointer being ``KnownSafe''. In the
+ // presense of a block being initialized, the frontend will emit the
+ // objc_retain on the original pointer and the release on the pointer loaded
+ // from the alloca. The optimizer will through the provenance analysis
+ // realize that the two are related, but since we only require KnownSafe in
+ // one direction, will match the inner retain on the original pointer with
+ // the guard release on the original pointer. This is fixed by ensuring that
+ // in the presense of allocas we only unconditionally remove pointers if
+ // both our retain and our release are KnownSafe.
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (AreAnyUnderlyingObjectsAnAlloca(SI->getPointerOperand())) {
+ BBState::ptr_iterator I = MyStates.findPtrBottomUpState(
+ StripPointerCastsAndObjCCalls(SI->getValueOperand()));
+ if (I != MyStates.bottom_up_ptr_end())
+ MultiOwnersSet.insert(I->first);
+ }
+ }
+ break;
default:
break;
}
@@ -1908,14 +1868,14 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
if (CanUse(Inst, Ptr, PA, Class)) {
DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr
<< "\n");
- assert(S.RRI.ReverseInsertPts.empty());
+ assert(!S.HasReverseInsertPts());
// If this is an invoke instruction, we're scanning it as part of
// one of its successor blocks, since we can't insert code after it
// in its own block, and we don't want to split critical edges.
if (isa<InvokeInst>(Inst))
- S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt());
+ S.InsertReverseInsertPt(BB->getFirstInsertionPt());
else
- S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
+ S.InsertReverseInsertPt(llvm::next(BasicBlock::iterator(Inst)));
S.SetSeq(S_Use);
ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
} else if (Seq == S_Release && IsUser(Class)) {
@@ -1924,12 +1884,12 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
// Non-movable releases depend on any possible objc pointer use.
S.SetSeq(S_Stop);
ANNOTATE_BOTTOMUP(Inst, Ptr, S_Release, S_Stop);
- assert(S.RRI.ReverseInsertPts.empty());
+ assert(!S.HasReverseInsertPts());
// As above; handle invoke specially.
if (isa<InvokeInst>(Inst))
- S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt());
+ S.InsertReverseInsertPt(BB->getFirstInsertionPt());
else
- S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
+ S.InsertReverseInsertPt(llvm::next(BasicBlock::iterator(Inst)));
}
break;
case S_Stop:
@@ -2049,8 +2009,8 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_Retain);
S.ResetSequenceProgress(S_Retain);
- S.RRI.KnownSafe = S.HasKnownPositiveRefCount();
- S.RRI.Calls.insert(Inst);
+ S.SetKnownSafe(S.HasKnownPositiveRefCount());
+ S.InsertCall(Inst);
}
S.SetKnownPositiveRefCount();
@@ -2073,12 +2033,12 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
case S_Retain:
case S_CanRelease:
if (OldSeq == S_Retain || ReleaseMetadata != 0)
- S.RRI.ReverseInsertPts.clear();
+ S.ClearReverseInsertPts();
// FALL THROUGH
case S_Use:
- S.RRI.ReleaseMetadata = ReleaseMetadata;
- S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
- Releases[Inst] = S.RRI;
+ S.SetReleaseMetadata(ReleaseMetadata);
+ S.SetTailCallRelease(cast<CallInst>(Inst)->isTailCall());
+ Releases[Inst] = S.GetRRInfo();
ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_None);
S.ClearSequenceProgress();
break;
@@ -2122,8 +2082,8 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
case S_Retain:
S.SetSeq(S_CanRelease);
ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_CanRelease);
- assert(S.RRI.ReverseInsertPts.empty());
- S.RRI.ReverseInsertPts.insert(Inst);
+ assert(!S.HasReverseInsertPts());
+ S.InsertReverseInsertPt(Inst);
// One call can't cause a transition from S_Retain to S_CanRelease
// and S_CanRelease to S_Use. If we've made the first transition,
@@ -2350,8 +2310,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
Instruction *InsertPt = *PI;
Value *MyArg = ArgTy == ParamTy ? Arg :
new BitCastInst(Arg, ParamTy, "", InsertPt);
- CallInst *Call =
- CallInst::Create(getRetainCallee(M), MyArg, "", InsertPt);
+ Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Retain);
+ CallInst *Call = CallInst::Create(Decl, MyArg, "", InsertPt);
Call->setDoesNotThrow();
Call->setTailCall();
@@ -2364,8 +2324,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
Instruction *InsertPt = *PI;
Value *MyArg = ArgTy == ParamTy ? Arg :
new BitCastInst(Arg, ParamTy, "", InsertPt);
- CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg,
- "", InsertPt);
+ Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Release);
+ CallInst *Call = CallInst::Create(Decl, MyArg, "", InsertPt);
// Attach a clang.imprecise_release metadata tag, if appropriate.
if (MDNode *M = ReleasesToMove.ReleaseMetadata)
Call->setMetadata(ImpreciseReleaseMDKind, M);
@@ -2403,17 +2363,20 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
MapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases,
Module *M,
- SmallVector<Instruction *, 4> &NewRetains,
- SmallVector<Instruction *, 4> &NewReleases,
- SmallVector<Instruction *, 8> &DeadInsts,
+ SmallVectorImpl<Instruction *> &NewRetains,
+ SmallVectorImpl<Instruction *> &NewReleases,
+ SmallVectorImpl<Instruction *> &DeadInsts,
RRInfo &RetainsToMove,
RRInfo &ReleasesToMove,
Value *Arg,
bool KnownSafe,
bool &AnyPairsCompletelyEliminated) {
// If a pair happens in a region where it is known that the reference count
- // is already incremented, we can similarly ignore possible decrements.
+ // is already incremented, we can similarly ignore possible decrements unless
+ // we are dealing with a retainable object with multiple provenance sources.
bool KnownSafeTD = true, KnownSafeBU = true;
+ bool MultipleOwners = false;
+ bool CFGHazardAfflicted = false;
// Connect the dots between the top-down-collected RetainsToMove and
// bottom-up-collected ReleasesToMove to form sets of related calls.
@@ -2432,6 +2395,8 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
assert(It != Retains.end());
const RRInfo &NewRetainRRI = It->second;
KnownSafeTD &= NewRetainRRI.KnownSafe;
+ MultipleOwners =
+ MultipleOwners || MultiOwnersSet.count(GetObjCArg(NewRetain));
for (SmallPtrSet<Instruction *, 2>::const_iterator
LI = NewRetainRRI.Calls.begin(),
LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) {
@@ -2441,10 +2406,27 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
if (Jt == Releases.end())
return false;
const RRInfo &NewRetainReleaseRRI = Jt->second;
- assert(NewRetainReleaseRRI.Calls.count(NewRetain));
+
+ // If the release does not have a reference to the retain as well,
+ // something happened which is unaccounted for. Do not do anything.
+ //
+ // This can happen if we catch an additive overflow during path count
+ // merging.
+ if (!NewRetainReleaseRRI.Calls.count(NewRetain))
+ return false;
+
if (ReleasesToMove.Calls.insert(NewRetainRelease)) {
- OldDelta -=
- BBStates[NewRetainRelease->getParent()].GetAllPathCount();
+
+ // If we overflow when we compute the path count, don't remove/move
+ // anything.
+ const BBState &NRRBBState = BBStates[NewRetainRelease->getParent()];
+ unsigned PathCount = BBState::OverflowOccurredValue;
+ if (NRRBBState.GetAllPathCountWithOverflow(PathCount))
+ return false;
+ assert(PathCount != BBState::OverflowOccurredValue &&
+ "PathCount at this point can not be "
+ "OverflowOccurredValue.");
+ OldDelta -= PathCount;
// Merge the ReleaseMetadata and IsTailCallRelease values.
if (FirstRelease) {
@@ -2469,8 +2451,18 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
RE = NewRetainReleaseRRI.ReverseInsertPts.end();
RI != RE; ++RI) {
Instruction *RIP = *RI;
- if (ReleasesToMove.ReverseInsertPts.insert(RIP))
- NewDelta -= BBStates[RIP->getParent()].GetAllPathCount();
+ if (ReleasesToMove.ReverseInsertPts.insert(RIP)) {
+ // If we overflow when we compute the path count, don't
+ // remove/move anything.
+ const BBState &RIPBBState = BBStates[RIP->getParent()];
+ PathCount = BBState::OverflowOccurredValue;
+ if (RIPBBState.GetAllPathCountWithOverflow(PathCount))
+ return false;
+ assert(PathCount != BBState::OverflowOccurredValue &&
+ "PathCount at this point can not be "
+ "OverflowOccurredValue.");
+ NewDelta -= PathCount;
+ }
}
NewReleases.push_back(NewRetainRelease);
}
@@ -2488,6 +2480,7 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
assert(It != Releases.end());
const RRInfo &NewReleaseRRI = It->second;
KnownSafeBU &= NewReleaseRRI.KnownSafe;
+ CFGHazardAfflicted |= NewReleaseRRI.CFGHazardAfflicted;
for (SmallPtrSet<Instruction *, 2>::const_iterator
LI = NewReleaseRRI.Calls.begin(),
LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) {
@@ -2497,10 +2490,25 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
if (Jt == Retains.end())
return false;
const RRInfo &NewReleaseRetainRRI = Jt->second;
- assert(NewReleaseRetainRRI.Calls.count(NewRelease));
+
+ // If the retain does not have a reference to the release as well,
+ // something happened which is unaccounted for. Do not do anything.
+ //
+ // This can happen if we catch an additive overflow during path count
+ // merging.
+ if (!NewReleaseRetainRRI.Calls.count(NewRelease))
+ return false;
+
if (RetainsToMove.Calls.insert(NewReleaseRetain)) {
- unsigned PathCount =
- BBStates[NewReleaseRetain->getParent()].GetAllPathCount();
+ // If we overflow when we compute the path count, don't remove/move
+ // anything.
+ const BBState &NRRBBState = BBStates[NewReleaseRetain->getParent()];
+ unsigned PathCount = BBState::OverflowOccurredValue;
+ if (NRRBBState.GetAllPathCountWithOverflow(PathCount))
+ return false;
+ assert(PathCount != BBState::OverflowOccurredValue &&
+ "PathCount at this point can not be "
+ "OverflowOccurredValue.");
OldDelta += PathCount;
OldCount += PathCount;
@@ -2512,7 +2520,16 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
RI != RE; ++RI) {
Instruction *RIP = *RI;
if (RetainsToMove.ReverseInsertPts.insert(RIP)) {
- PathCount = BBStates[RIP->getParent()].GetAllPathCount();
+ // If we overflow when we compute the path count, don't
+ // remove/move anything.
+ const BBState &RIPBBState = BBStates[RIP->getParent()];
+
+ PathCount = BBState::OverflowOccurredValue;
+ if (RIPBBState.GetAllPathCountWithOverflow(PathCount))
+ return false;
+ assert(PathCount != BBState::OverflowOccurredValue &&
+ "PathCount at this point can not be "
+ "OverflowOccurredValue.");
NewDelta += PathCount;
NewCount += PathCount;
}
@@ -2525,9 +2542,12 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
if (NewRetains.empty()) break;
}
- // If the pointer is known incremented or nested, we can safely delete the
- // pair regardless of what's between them.
- if (KnownSafeTD || KnownSafeBU) {
+ // If the pointer is known incremented in 1 direction and we do not have
+ // MultipleOwners, we can safely remove the retain/releases. Otherwise we need
+ // to be known safe in both directions.
+ bool UnconditionallySafe = (KnownSafeTD && KnownSafeBU) ||
+ ((KnownSafeTD || KnownSafeBU) && !MultipleOwners);
+ if (UnconditionallySafe) {
RetainsToMove.ReverseInsertPts.clear();
ReleasesToMove.ReverseInsertPts.clear();
NewCount = 0;
@@ -2538,6 +2558,14 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
// less aggressive solution which is.
if (NewDelta != 0)
return false;
+
+ // At this point, we are not going to remove any RR pairs, but we still are
+ // able to move RR pairs. If one of our pointers is afflicted with
+ // CFGHazards, we cannot perform such code motion so exit early.
+ const bool WillPerformCodeMotion = RetainsToMove.ReverseInsertPts.size() ||
+ ReleasesToMove.ReverseInsertPts.size();
+ if (CFGHazardAfflicted && WillPerformCodeMotion)
+ return false;
}
// Determine whether the original call points are balanced in the retain and
@@ -2685,9 +2713,8 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
Changed = true;
// If the load has a builtin retain, insert a plain retain for it.
if (Class == IC_LoadWeakRetained) {
- CallInst *CI =
- CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
- "", Call);
+ Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Retain);
+ CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call);
CI->setTailCall();
}
// Zap the fully redundant load.
@@ -2715,9 +2742,8 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
Changed = true;
// If the load has a builtin retain, insert a plain retain for it.
if (Class == IC_LoadWeakRetained) {
- CallInst *CI =
- CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
- "", Call);
+ Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Retain);
+ CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call);
CI->setTailCall();
}
// Zap the fully redundant load.
@@ -2801,23 +2827,29 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
/// Identify program paths which execute sequences of retains and releases which
/// can be eliminated.
bool ObjCARCOpt::OptimizeSequences(Function &F) {
- /// Releases, Retains - These are used to store the results of the main flow
- /// analysis. These use Value* as the key instead of Instruction* so that the
- /// map stays valid when we get around to rewriting code and calls get
- /// replaced by arguments.
+ // Releases, Retains - These are used to store the results of the main flow
+ // analysis. These use Value* as the key instead of Instruction* so that the
+ // map stays valid when we get around to rewriting code and calls get
+ // replaced by arguments.
DenseMap<Value *, RRInfo> Releases;
MapVector<Value *, RRInfo> Retains;
- /// This is used during the traversal of the function to track the
- /// states for each identified object at each block.
+ // This is used during the traversal of the function to track the
+ // states for each identified object at each block.
DenseMap<const BasicBlock *, BBState> BBStates;
// Analyze the CFG of the function, and all instructions.
bool NestingDetected = Visit(F, BBStates, Retains, Releases);
// Transform.
- return PerformCodePlacement(BBStates, Retains, Releases, F.getParent()) &&
- NestingDetected;
+ bool AnyPairsCompletelyEliminated = PerformCodePlacement(BBStates, Retains,
+ Releases,
+ F.getParent());
+
+ // Cleanup.
+ MultiOwnersSet.clear();
+
+ return AnyPairsCompletelyEliminated && NestingDetected;
}
/// Check if there is a dependent call earlier that does not have anything in
@@ -3025,12 +3057,8 @@ bool ObjCARCOpt::doInitialization(Module &M) {
// they are not, because they return their argument value. And objc_release
// calls finalizers which can have arbitrary side effects.
- // These are initialized lazily.
- AutoreleaseRVCallee = 0;
- ReleaseCallee = 0;
- RetainCallee = 0;
- RetainBlockCallee = 0;
- AutoreleaseCallee = 0;
+ // Initialize our runtime entry point cache.
+ EP.Initialize(&M);
return false;
}
@@ -3050,6 +3078,12 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
PA.setAA(&getAnalysis<AliasAnalysis>());
+#ifndef NDEBUG
+ if (AreStatisticsEnabled()) {
+ GatherStatistics(F, false);
+ }
+#endif
+
// This pass performs several distinct transformations. As a compile-time aid
// when compiling code that isn't ObjC, skip these if the relevant ObjC
// library functions aren't declared.
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCUtil.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCUtil.cpp
index 03e12d4..53c077e 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCUtil.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCUtil.cpp
@@ -1,4 +1,4 @@
-//===- ObjCARCUtil.cpp - ObjC ARC Optimization --------*- mode: c++ -*-----===//
+//===- ObjCARCUtil.cpp - ObjC ARC Optimization ----------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -112,6 +112,8 @@ InstructionClass llvm::objcarc::GetFunctionClass(const Function *F) {
.Case("objc_retain_autorelease", IC_FusedRetainAutorelease)
.Case("objc_retainAutorelease", IC_FusedRetainAutorelease)
.Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV)
+ .Case("objc_sync_enter", IC_User)
+ .Case("objc_sync_exit", IC_User)
.Default(IC_CallOrUser);
// Argument is i8**
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
index ec449fd8e..a13fb9e 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
@@ -1,4 +1,4 @@
-//===- ProvenanceAnalysis.h - ObjC ARC Optimization ---*- mode: c++ -*-----===//
+//===- ProvenanceAnalysis.h - ObjC ARC Optimization ---*- C++ -*-----------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
index a097308..a3eb07a9 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -83,7 +83,7 @@ bool ADCE::runOnFunction(Function& F) {
I->dropAllReferences();
}
- for (SmallVector<Instruction*, 1024>::iterator I = worklist.begin(),
+ for (SmallVectorImpl<Instruction *>::iterator I = worklist.begin(),
E = worklist.end(); I != E; ++I) {
++NumRemoved;
(*I)->eraseFromParent();
diff --git a/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp
deleted file mode 100644
index e755008..0000000
--- a/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-//===-- BasicBlockPlacement.cpp - Basic Block Code Layout optimization ----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a very simple profile guided basic block placement
-// algorithm. The idea is to put frequently executed blocks together at the
-// start of the function, and hopefully increase the number of fall-through
-// conditional branches. If there is no profile information for a particular
-// function, this pass basically orders blocks in depth-first order
-//
-// The algorithm implemented here is basically "Algo1" from "Profile Guided Code
-// Positioning" by Pettis and Hansen, except that it uses basic block counts
-// instead of edge counts. This should be improved in many ways, but is very
-// simple for now.
-//
-// Basically we "place" the entry block, then loop over all successors in a DFO,
-// placing the most frequently executed successor until we run out of blocks. I
-// told you this was _extremely_ simplistic. :) This is also much slower than it
-// could be. When it becomes important, this pass will be rewritten to use a
-// better algorithm, and then we can worry about efficiency.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "block-placement"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include <set>
-using namespace llvm;
-
-STATISTIC(NumMoved, "Number of basic blocks moved");
-
-namespace {
- struct BlockPlacement : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- BlockPlacement() : FunctionPass(ID) {
- initializeBlockPlacementPass(*PassRegistry::getPassRegistry());
- }
-
- virtual bool runOnFunction(Function &F);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addRequired<ProfileInfo>();
- //AU.addPreserved<ProfileInfo>(); // Does this work?
- }
- private:
- /// PI - The profile information that is guiding us.
- ///
- ProfileInfo *PI;
-
- /// NumMovedBlocks - Every time we move a block, increment this counter.
- ///
- unsigned NumMovedBlocks;
-
- /// PlacedBlocks - Every time we place a block, remember it so we don't get
- /// into infinite loops.
- std::set<BasicBlock*> PlacedBlocks;
-
- /// InsertPos - This an iterator to the next place we want to insert a
- /// block.
- Function::iterator InsertPos;
-
- /// PlaceBlocks - Recursively place the specified blocks and any unplaced
- /// successors.
- void PlaceBlocks(BasicBlock *BB);
- };
-}
-
-char BlockPlacement::ID = 0;
-INITIALIZE_PASS_BEGIN(BlockPlacement, "block-placement",
- "Profile Guided Basic Block Placement", false, false)
-INITIALIZE_AG_DEPENDENCY(ProfileInfo)
-INITIALIZE_PASS_END(BlockPlacement, "block-placement",
- "Profile Guided Basic Block Placement", false, false)
-
-FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); }
-
-bool BlockPlacement::runOnFunction(Function &F) {
- PI = &getAnalysis<ProfileInfo>();
-
- NumMovedBlocks = 0;
- InsertPos = F.begin();
-
- // Recursively place all blocks.
- PlaceBlocks(F.begin());
-
- PlacedBlocks.clear();
- NumMoved += NumMovedBlocks;
- return NumMovedBlocks != 0;
-}
-
-
-/// PlaceBlocks - Recursively place the specified blocks and any unplaced
-/// successors.
-void BlockPlacement::PlaceBlocks(BasicBlock *BB) {
- assert(!PlacedBlocks.count(BB) && "Already placed this block!");
- PlacedBlocks.insert(BB);
-
- // Place the specified block.
- if (&*InsertPos != BB) {
- // Use splice to move the block into the right place. This avoids having to
- // remove the block from the function then readd it, which causes a bunch of
- // symbol table traffic that is entirely pointless.
- Function::BasicBlockListType &Blocks = BB->getParent()->getBasicBlockList();
- Blocks.splice(InsertPos, Blocks, BB);
-
- ++NumMovedBlocks;
- } else {
- // This block is already in the right place, we don't have to do anything.
- ++InsertPos;
- }
-
- // Keep placing successors until we run out of ones to place. Note that this
- // loop is very inefficient (N^2) for blocks with many successors, like switch
- // statements. FIXME!
- while (1) {
- // Okay, now place any unplaced successors.
- succ_iterator SI = succ_begin(BB), E = succ_end(BB);
-
- // Scan for the first unplaced successor.
- for (; SI != E && PlacedBlocks.count(*SI); ++SI)
- /*empty*/;
- if (SI == E) return; // No more successors to place.
-
- double MaxExecutionCount = PI->getExecutionCount(*SI);
- BasicBlock *MaxSuccessor = *SI;
-
- // Scan for more frequently executed successors
- for (; SI != E; ++SI)
- if (!PlacedBlocks.count(*SI)) {
- double Count = PI->getExecutionCount(*SI);
- if (Count > MaxExecutionCount ||
- // Prefer to not disturb the code.
- (Count == MaxExecutionCount && *SI == &*InsertPos)) {
- MaxExecutionCount = Count;
- MaxSuccessor = *SI;
- }
- }
-
- // Now that we picked the maximally executed successor, place it.
- PlaceBlocks(MaxSuccessor);
- }
-}
diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
index f0d29c8..007e9b7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -22,7 +22,6 @@
#include "llvm/Analysis/DominatorInternals.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -76,10 +75,10 @@ namespace {
class CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
+ const TargetMachine *TM;
const TargetLowering *TLI;
const TargetLibraryInfo *TLInfo;
DominatorTree *DT;
- ProfileInfo *PFI;
/// CurInstIterator - As we scan instructions optimizing them, this is the
/// next instruction to optimize. Xforms that can invalidate this should
@@ -100,8 +99,8 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- explicit CodeGenPrepare(const TargetLowering *tli = 0)
- : FunctionPass(ID), TLI(tli) {
+ explicit CodeGenPrepare(const TargetMachine *TM = 0)
+ : FunctionPass(ID), TM(TM), TLI(0) {
initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F);
@@ -110,7 +109,6 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
- AU.addPreserved<ProfileInfo>();
AU.addRequired<TargetLibraryInfo>();
}
@@ -139,17 +137,17 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare",
"Optimize for code generation", false, false)
-FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
- return new CodeGenPrepare(TLI);
+FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
+ return new CodeGenPrepare(TM);
}
bool CodeGenPrepare::runOnFunction(Function &F) {
bool EverMadeChange = false;
ModifiedDT = false;
+ if (TM) TLI = TM->getTargetLowering();
TLInfo = &getAnalysis<TargetLibraryInfo>();
DT = getAnalysisIfAvailable<DominatorTree>();
- PFI = getAnalysisIfAvailable<ProfileInfo>();
OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::OptimizeForSize);
@@ -205,7 +203,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
DeleteDeadBlock(BB);
-
+
for (SmallVectorImpl<BasicBlock*>::iterator
II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
if (pred_begin(*II) == pred_end(*II))
@@ -440,10 +438,6 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
DT->changeImmediateDominator(DestBB, NewIDom);
DT->eraseNode(BB);
}
- if (PFI) {
- PFI->replaceAllUses(BB, DestBB);
- PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
- }
BB->eraseFromParent();
++NumBlocksElim;
@@ -830,7 +824,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
ExtAddrMode() : BaseReg(0), ScaledReg(0) {}
void print(raw_ostream &OS) const;
void dump() const;
-
+
bool operator==(const ExtAddrMode& O) const {
return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
(BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
@@ -838,10 +832,12 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
}
};
+#ifndef NDEBUG
static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
AM.print(OS);
return OS;
}
+#endif
void ExtAddrMode::print(raw_ostream &OS) const {
bool NeedPlus = false;
@@ -866,7 +862,6 @@ void ExtAddrMode::print(raw_ostream &OS) const {
OS << (NeedPlus ? " + " : "")
<< Scale << "*";
WriteAsOperand(OS, ScaledReg, /*PrintType=*/false);
- NeedPlus = true;
}
OS << ']';
@@ -891,16 +886,16 @@ class AddressingModeMatcher {
/// the memory instruction that we're computing this address for.
Type *AccessTy;
Instruction *MemoryInst;
-
+
/// AddrMode - This is the addressing mode that we're building up. This is
/// part of the return value of this addressing mode matching stuff.
ExtAddrMode &AddrMode;
-
+
/// IgnoreProfitability - This is set to true when we should not do
/// profitability checks. When true, IsProfitableToFoldIntoAddressingMode
/// always returns true.
bool IgnoreProfitability;
-
+
AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
const TargetLowering &T, Type *AT,
Instruction *MI, ExtAddrMode &AM)
@@ -908,7 +903,7 @@ class AddressingModeMatcher {
IgnoreProfitability = false;
}
public:
-
+
/// Match - Find the maximal addressing mode that a load/store of V can fold,
/// give an access type of AccessTy. This returns a list of involved
/// instructions in AddrModeInsts.
@@ -918,7 +913,7 @@ public:
const TargetLowering &TLI) {
ExtAddrMode Result;
- bool Success =
+ bool Success =
AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
MemoryInst, Result).MatchAddr(V, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
@@ -943,11 +938,11 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
// mode. Just process that directly.
if (Scale == 1)
return MatchAddr(ScaleReg, Depth);
-
+
// If the scale is 0, it takes nothing to add this.
if (Scale == 0)
return true;
-
+
// If we already have a scale of this value, we can add to it, otherwise, we
// need an available scale field.
if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
@@ -966,7 +961,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
// It was legal, so commit it.
AddrMode = TestAddrMode;
-
+
// Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
// to see if ScaleReg is actually X+C. If so, we can turn this into adding
// X*Scale + C*Scale to addr mode.
@@ -975,7 +970,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
TestAddrMode.ScaledReg = AddLHS;
TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
-
+
// If this addressing mode is legal, commit it and remember that we folded
// this instruction.
if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
@@ -1026,7 +1021,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
unsigned Depth) {
// Avoid exponential behavior on extremely deep expression trees.
if (Depth >= 5) return false;
-
+
switch (Opcode) {
case Instruction::PtrToInt:
// PtrToInt is always a noop, as we know that the int type is pointer sized.
@@ -1034,7 +1029,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
case Instruction::IntToPtr:
// This inttoptr is a no-op if the integer type is pointer sized.
if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
- TLI.getPointerTy())
+ TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace()))
return MatchAddr(AddrInst->getOperand(0), Depth);
return false;
case Instruction::BitCast:
@@ -1055,16 +1050,16 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
MatchAddr(AddrInst->getOperand(0), Depth+1))
return true;
-
+
// Restore the old addr mode info.
AddrMode = BackupAddrMode;
AddrModeInsts.resize(OldSize);
-
+
// Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
MatchAddr(AddrInst->getOperand(1), Depth+1))
return true;
-
+
// Otherwise we definitely can't merge the ADD in.
AddrMode = BackupAddrMode;
AddrModeInsts.resize(OldSize);
@@ -1081,7 +1076,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
int64_t Scale = RHS->getSExtValue();
if (Opcode == Instruction::Shl)
Scale = 1LL << Scale;
-
+
return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
}
case Instruction::GetElementPtr: {
@@ -1089,7 +1084,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
// one variable offset.
int VariableOperand = -1;
unsigned VariableScale = 0;
-
+
int64_t ConstantOffset = 0;
const DataLayout *TD = TLI.getDataLayout();
gep_type_iterator GTI = gep_type_begin(AddrInst);
@@ -1107,14 +1102,14 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
// We only allow one variable index at the moment.
if (VariableOperand != -1)
return false;
-
+
// Remember the variable index.
VariableOperand = i;
VariableScale = TypeSize;
}
}
}
-
+
// A common case is for the GEP to only do a constant offset. In this case,
// just add it to the disp field and check validity.
if (VariableOperand == -1) {
@@ -1208,7 +1203,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
AddrModeInsts.push_back(I);
return true;
}
-
+
// It isn't profitable to do this, roll back.
//cerr << "NOT FOLDING: " << *I;
AddrMode = BackupAddrMode;
@@ -1254,7 +1249,7 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
-
+
// Compute the constraint code and ConstraintType to use.
TLI.ComputeConstraintToUse(OpInfo, SDValue());
@@ -1279,7 +1274,7 @@ static bool FindAllMemoryUses(Instruction *I,
// If we already considered this instruction, we're done.
if (!ConsideredInsts.insert(I))
return false;
-
+
// If this is an obviously unfoldable instruction, bail out.
if (!MightBeFoldableInst(I))
return true;
@@ -1293,24 +1288,24 @@ static bool FindAllMemoryUses(Instruction *I,
MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
continue;
}
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
unsigned opNo = UI.getOperandNo();
if (opNo == 0) return true; // Storing addr, not into addr.
MemoryUses.push_back(std::make_pair(SI, opNo));
continue;
}
-
+
if (CallInst *CI = dyn_cast<CallInst>(U)) {
InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
if (!IA) return true;
-
+
// If this is a memory operand, we're cool, otherwise bail out.
if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
return true;
continue;
}
-
+
if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts,
TLI))
return true;
@@ -1328,17 +1323,17 @@ bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
// If Val is either of the known-live values, we know it is live!
if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
return true;
-
+
// All values other than instructions and arguments (e.g. constants) are live.
if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
-
+
// If Val is a constant sized alloca in the entry block, it is live, this is
// true because it is just a reference to the stack/frame pointer, which is
// live for the whole function.
if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
if (AI->isStaticAlloca())
return true;
-
+
// Check to see if this value is already used in the memory instruction's
// block. If so, it's already live into the block at the very least, so we
// can reasonably fold it.
@@ -1370,7 +1365,7 @@ bool AddressingModeMatcher::
IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
ExtAddrMode &AMAfter) {
if (IgnoreProfitability) return true;
-
+
// AMBefore is the addressing mode before this instruction was folded into it,
// and AMAfter is the addressing mode after the instruction was folded. Get
// the set of registers referenced by AMAfter and subtract out those
@@ -1381,7 +1376,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// BaseReg and ScaleReg (global addresses are always available, as are any
// folded immediates).
Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
-
+
// If the BaseReg or ScaledReg was referenced by the previous addrmode, their
// lifetime wasn't extended by adding this instruction.
if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
@@ -1402,7 +1397,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
SmallPtrSet<Instruction*, 16> ConsideredInsts;
if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
return false; // Has a non-memory, non-foldable use!
-
+
// Now that we know that all uses of this instruction are part of a chain of
// computation involving only operations that could theoretically be folded
// into a memory use, loop over each of these uses and see if they could
@@ -1411,15 +1406,14 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
Instruction *User = MemoryUses[i].first;
unsigned OpNo = MemoryUses[i].second;
-
+
// Get the access type of this use. If the use isn't a pointer, we don't
// know what it accesses.
Value *Address = User->getOperand(OpNo);
if (!Address->getType()->isPointerTy())
return false;
- Type *AddressAccessTy =
- cast<PointerType>(Address->getType())->getElementType();
-
+ Type *AddressAccessTy = Address->getType()->getPointerElementType();
+
// Do a match against the root of this address, ignoring profitability. This
// will tell us if the addressing mode for the memory operation will
// *actually* cover the shared instruction.
@@ -1434,10 +1428,10 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
I) == MatchedAddrModeInsts.end())
return false;
-
+
MatchedAddrModeInsts.clear();
}
-
+
return true;
}
@@ -1572,9 +1566,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
} else {
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst);
- Type *IntPtrTy =
- TLI->getDataLayout()->getIntPtrType(AccessTy->getContext());
-
+ Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
Value *Result = 0;
// Start with the base register. Do this first so that subsequent address
@@ -1893,7 +1885,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
// It is possible for very late stage optimizations (such as SimplifyCFG)
// to introduce PHI nodes too late to be cleaned up. If we detect such a
// trivial PHI, go ahead and zap it here.
- if (Value *V = SimplifyInstruction(P)) {
+ if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : 0,
+ TLInfo, DT)) {
P->replaceAllUsesWith(V);
P->eraseFromParent();
++NumPHIsElim;
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 3c08634..5266894 100644
--- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -72,11 +72,6 @@ namespace {
}
namespace llvm {
-// SimpleValue is POD.
-template<> struct isPodLike<SimpleValue> {
- static const bool value = true;
-};
-
template<> struct DenseMapInfo<SimpleValue> {
static inline SimpleValue getEmptyKey() {
return DenseMapInfo<Instruction*>::getEmptyKey();
@@ -220,11 +215,6 @@ namespace {
}
namespace llvm {
- // CallValue is POD.
- template<> struct isPodLike<CallValue> {
- static const bool value = true;
- };
-
template<> struct DenseMapInfo<CallValue> {
static inline CallValue getEmptyKey() {
return DenseMapInfo<Instruction*>::getEmptyKey();
diff --git a/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
new file mode 100644
index 0000000..e7de07f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -0,0 +1,79 @@
+//===- FlattenCFGPass.cpp - CFG Flatten Pass ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements flattening of CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "flattencfg"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+namespace {
+struct FlattenCFGPass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+public:
+ FlattenCFGPass() : FunctionPass(ID) {
+ initializeFlattenCFGPassPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ }
+
+private:
+ AliasAnalysis *AA;
+};
+}
+
+char FlattenCFGPass::ID = 0;
+INITIALIZE_PASS_BEGIN(FlattenCFGPass, "flattencfg", "Flatten the CFG", false,
+ false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(FlattenCFGPass, "flattencfg", "Flatten the CFG", false,
+ false)
+
+// Public interface to the FlattenCFG pass
+FunctionPass *llvm::createFlattenCFGPass() { return new FlattenCFGPass(); }
+
+/// iterativelyFlattenCFG - Call FlattenCFG on all the blocks in the function,
+/// iterating until no more changes are made.
+static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) {
+ bool Changed = false;
+ bool LocalChange = true;
+ while (LocalChange) {
+ LocalChange = false;
+
+ // Loop over all of the basic blocks and remove them if they are unneeded...
+ //
+ for (Function::iterator BBIt = F.begin(); BBIt != F.end();) {
+ if (FlattenCFG(BBIt++, AA)) {
+ LocalChange = true;
+ }
+ }
+ Changed |= LocalChange;
+ }
+ return Changed;
+}
+
+bool FlattenCFGPass::runOnFunction(Function &F) {
+ AA = &getAnalysis<AliasAnalysis>();
+ bool EverChanged = false;
+ // iterativelyFlattenCFG can make some blocks dead.
+ while (iterativelyFlattenCFG(F, AA)) {
+ removeUnreachableBlocks(F);
+ EverChanged = true;
+ }
+ return EverChanged;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index f350b9b..6af269d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -21,8 +21,10 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -45,6 +47,7 @@
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include <vector>
using namespace llvm;
using namespace PatternMatch;
@@ -505,7 +508,9 @@ namespace {
enum ValType {
SimpleVal, // A simple offsetted value that is accessed.
LoadVal, // A value produced by a load.
- MemIntrin // A memory intrinsic which is loaded from.
+ MemIntrin, // A memory intrinsic which is loaded from.
+ UndefVal // A UndefValue representing a value from dead block (which
+ // is not yet physically removed from the CFG).
};
/// V - The value that is live out of the block.
@@ -543,10 +548,20 @@ namespace {
Res.Offset = Offset;
return Res;
}
-
+
+ static AvailableValueInBlock getUndef(BasicBlock *BB) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(0);
+ Res.Val.setInt(UndefVal);
+ Res.Offset = 0;
+ return Res;
+ }
+
bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+ bool isUndefValue() const { return Val.getInt() == UndefVal; }
Value *getSimpleValue() const {
assert(isSimpleValue() && "Wrong accessor");
@@ -574,6 +589,7 @@ namespace {
DominatorTree *DT;
const DataLayout *TD;
const TargetLibraryInfo *TLI;
+ SetVector<BasicBlock *> DeadBlocks;
ValueTable VN;
@@ -692,9 +708,13 @@ namespace {
void cleanupGlobalSets();
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
+ BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ);
unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
const BasicBlockEdge &Root);
bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
+ bool processFoldableCondBr(BranchInst *BI);
+ void addDeadBlock(BasicBlock *BB);
+ void assignValNumForDeadCode();
};
char GVN::ID = 0;
@@ -1068,14 +1088,15 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
if (Offset == -1)
return Offset;
+ unsigned AS = Src->getType()->getPointerAddressSpace();
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
Src = ConstantExpr::getBitCast(Src,
- llvm::Type::getInt8PtrTy(Src->getContext()));
+ Type::getInt8PtrTy(Src->getContext(), AS));
Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
- Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+ Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
if (ConstantFoldLoadFromConstPtr(Src, &TD))
return Offset;
return -1;
@@ -1152,7 +1173,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
Type *DestPTy =
IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
DestPTy = PointerType::get(DestPTy,
- cast<PointerType>(PtrVal->getType())->getAddressSpace());
+ PtrVal->getType()->getPointerAddressSpace());
Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
@@ -1227,15 +1248,16 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
// Otherwise, this is a memcpy/memmove from a constant global.
MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
Constant *Src = cast<Constant>(MTI->getSource());
+ unsigned AS = Src->getType()->getPointerAddressSpace();
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
Src = ConstantExpr::getBitCast(Src,
- llvm::Type::getInt8PtrTy(Src->getContext()));
+ Type::getInt8PtrTy(Src->getContext(), AS));
Constant *OffsetCst =
- ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+ ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
- Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+ Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
return ConstantFoldLoadFromConstPtr(Src, &TD);
}
@@ -1250,8 +1272,10 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
// just use the dominating value directly.
if (ValuesPerBlock.size() == 1 &&
gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
- LI->getParent()))
+ LI->getParent())) {
+ assert(!ValuesPerBlock[0].isUndefValue() && "Dead BB dominate this block");
return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
+ }
// Otherwise, we have to construct SSA form.
SmallVector<PHINode*, 8> NewPHIs;
@@ -1321,7 +1345,7 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
<< *getCoercedLoadValue() << '\n'
<< *Res << '\n' << "\n\n\n");
}
- } else {
+ } else if (isMemIntrinValue()) {
const DataLayout *TD = gvn.getDataLayout();
assert(TD && "Need target data to handle type mismatch case");
Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
@@ -1329,6 +1353,10 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
<< " " << *getMemIntrinValue() << '\n'
<< *Res << '\n' << "\n\n\n");
+ } else {
+ assert(isUndefValue() && "Should be UndefVal");
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";);
+ return UndefValue::get(LoadTy);
}
return Res;
}
@@ -1352,6 +1380,13 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
+ if (DeadBlocks.count(DepBB)) {
+ // Dead dependent mem-op disguise as a load evaluating the same value
+ // as the load in question.
+ ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB));
+ continue;
+ }
+
if (!DepInfo.isDef() && !DepInfo.isClobber()) {
UnavailableBlocks.push_back(DepBB);
continue;
@@ -1513,7 +1548,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
FullyAvailableBlocks[UnavailableBlocks[i]] = false;
- SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit;
+ SmallVector<BasicBlock *, 4> CriticalEdgePred;
for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
PI != E; ++PI) {
BasicBlock *Pred = *PI;
@@ -1536,20 +1571,14 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
return false;
}
- unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
- NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
+ CriticalEdgePred.push_back(Pred);
}
}
- if (!NeedToSplit.empty()) {
- toSplit.append(NeedToSplit.begin(), NeedToSplit.end());
- return false;
- }
-
// Decide whether PRE is profitable for this load.
unsigned NumUnavailablePreds = PredLoads.size();
assert(NumUnavailablePreds != 0 &&
- "Fully available value should be eliminated above!");
+ "Fully available value should already be eliminated!");
// If this load is unavailable in multiple predecessors, reject it.
// FIXME: If we could restructure the CFG, we could make a common pred with
@@ -1558,6 +1587,17 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
if (NumUnavailablePreds != 1)
return false;
+ // Split critical edges, and update the unavailable predecessors accordingly.
+ for (SmallVectorImpl<BasicBlock *>::iterator I = CriticalEdgePred.begin(),
+ E = CriticalEdgePred.end(); I != E; I++) {
+ BasicBlock *OrigPred = *I;
+ BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);
+ PredLoads.erase(OrigPred);
+ PredLoads[NewPred] = 0;
+ DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
+ << LoadBB->getName() << '\n');
+ }
+
// Check if the load can safely be moved to all the unavailable predecessors.
bool CanDoPRE = true;
SmallVector<Instruction*, 8> NewInsts;
@@ -1594,7 +1634,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
if (MD) MD->removeInstruction(I);
I->eraseFromParent();
}
- return false;
+ // HINT:Don't revert the edge-splitting as following transformation may
+ // also need to split these critial edges.
+ return !CriticalEdgePred.empty();
}
// Okay, we can eliminate this load by inserting a reload in the predecessor
@@ -2181,11 +2223,13 @@ bool GVN::processInstruction(Instruction *I) {
// For conditional branches, we can perform simple conditional propagation on
// the condition value itself.
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
- if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
+ if (!BI->isConditional())
return false;
- Value *BranchCond = BI->getCondition();
+ if (isa<Constant>(BI->getCondition()))
+ return processFoldableCondBr(BI);
+ Value *BranchCond = BI->getCondition();
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
// Avoid multiple edges early.
@@ -2297,25 +2341,30 @@ bool GVN::runOnFunction(Function& F) {
while (ShouldContinue) {
DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
ShouldContinue = iterateOnFunction(F);
- if (splitCriticalEdges())
- ShouldContinue = true;
Changed |= ShouldContinue;
++Iteration;
}
if (EnablePRE) {
+ // Fabricate val-num for dead-code in order to suppress assertion in
+ // performPRE().
+ assignValNumForDeadCode();
bool PREChanged = true;
while (PREChanged) {
PREChanged = performPRE(F);
Changed |= PREChanged;
}
}
+
// FIXME: Should perform GVN again after PRE does something. PRE can move
// computations into blocks where they become fully redundant. Note that
// we can't do this until PRE's critical edge splitting updates memdep.
// Actually, when this happens, we should just fully integrate PRE into GVN.
cleanupGlobalSets();
+ // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
+ // iteration.
+ DeadBlocks.clear();
return Changed;
}
@@ -2326,6 +2375,9 @@ bool GVN::processBlock(BasicBlock *BB) {
// (and incrementing BI before processing an instruction).
assert(InstrsToErase.empty() &&
"We expect InstrsToErase to be empty across iterations");
+ if (DeadBlocks.count(BB))
+ return false;
+
bool ChangedFunction = false;
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
@@ -2344,7 +2396,7 @@ bool GVN::processBlock(BasicBlock *BB) {
if (!AtStart)
--BI;
- for (SmallVector<Instruction*, 4>::iterator I = InstrsToErase.begin(),
+ for (SmallVectorImpl<Instruction *>::iterator I = InstrsToErase.begin(),
E = InstrsToErase.end(); I != E; ++I) {
DEBUG(dbgs() << "GVN removed: " << **I << '\n');
if (MD) MD->removeInstruction(*I);
@@ -2543,6 +2595,15 @@ bool GVN::performPRE(Function &F) {
return Changed;
}
+/// Split the critical edge connecting the given two blocks, and return
+/// the block inserted to the critical edge.
+BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
+ BasicBlock *BB = SplitCriticalEdge(Pred, Succ, this);
+ if (MD)
+ MD->invalidateCachedPredecessors();
+ return BB;
+}
+
/// splitCriticalEdges - Split critical edges found during the previous
/// iteration that may enable further optimization.
bool GVN::splitCriticalEdges() {
@@ -2569,9 +2630,18 @@ bool GVN::iterateOnFunction(Function &F) {
RE = RPOT.end(); RI != RE; ++RI)
Changed |= processBlock(*RI);
#else
+ // Save the blocks this function have before transformation begins. GVN may
+ // split critical edge, and hence may invalidate the RPO/DT iterator.
+ //
+ std::vector<BasicBlock *> BBVect;
+ BBVect.reserve(256);
for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
DE = df_end(DT->getRootNode()); DI != DE; ++DI)
- Changed |= processBlock(DI->getBlock());
+ BBVect.push_back(DI->getBlock());
+
+ for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end();
+ I != E; I++)
+ Changed |= processBlock(*I);
#endif
return Changed;
@@ -2601,3 +2671,133 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
}
}
}
+
+// BB is declared dead, which implied other blocks become dead as well. This
+// function is to add all these blocks to "DeadBlocks". For the dead blocks'
+// live successors, update their phi nodes by replacing the operands
+// corresponding to dead blocks with UndefVal.
+//
+void GVN::addDeadBlock(BasicBlock *BB) {
+ SmallVector<BasicBlock *, 4> NewDead;
+ SmallSetVector<BasicBlock *, 4> DF;
+
+ NewDead.push_back(BB);
+ while (!NewDead.empty()) {
+ BasicBlock *D = NewDead.pop_back_val();
+ if (DeadBlocks.count(D))
+ continue;
+
+ // All blocks dominated by D are dead.
+ SmallVector<BasicBlock *, 8> Dom;
+ DT->getDescendants(D, Dom);
+ DeadBlocks.insert(Dom.begin(), Dom.end());
+
+ // Figure out the dominance-frontier(D).
+ for (SmallVectorImpl<BasicBlock *>::iterator I = Dom.begin(),
+ E = Dom.end(); I != E; I++) {
+ BasicBlock *B = *I;
+ for (succ_iterator SI = succ_begin(B), SE = succ_end(B); SI != SE; SI++) {
+ BasicBlock *S = *SI;
+ if (DeadBlocks.count(S))
+ continue;
+
+ bool AllPredDead = true;
+ for (pred_iterator PI = pred_begin(S), PE = pred_end(S); PI != PE; PI++)
+ if (!DeadBlocks.count(*PI)) {
+ AllPredDead = false;
+ break;
+ }
+
+ if (!AllPredDead) {
+ // S could be proved dead later on. That is why we don't update phi
+ // operands at this moment.
+ DF.insert(S);
+ } else {
+ // While S is not dominated by D, it is dead by now. This could take
+ // place if S already have a dead predecessor before D is declared
+ // dead.
+ NewDead.push_back(S);
+ }
+ }
+ }
+ }
+
+ // For the dead blocks' live successors, update their phi nodes by replacing
+ // the operands corresponding to dead blocks with UndefVal.
+ for(SmallSetVector<BasicBlock *, 4>::iterator I = DF.begin(), E = DF.end();
+ I != E; I++) {
+ BasicBlock *B = *I;
+ if (DeadBlocks.count(B))
+ continue;
+
+ SmallVector<BasicBlock *, 4> Preds(pred_begin(B), pred_end(B));
+ for (SmallVectorImpl<BasicBlock *>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; PI++) {
+ BasicBlock *P = *PI;
+
+ if (!DeadBlocks.count(P))
+ continue;
+
+ if (isCriticalEdge(P->getTerminator(), GetSuccessorNumber(P, B))) {
+ if (BasicBlock *S = splitCriticalEdges(P, B))
+ DeadBlocks.insert(P = S);
+ }
+
+ for (BasicBlock::iterator II = B->begin(); isa<PHINode>(II); ++II) {
+ PHINode &Phi = cast<PHINode>(*II);
+ Phi.setIncomingValue(Phi.getBasicBlockIndex(P),
+ UndefValue::get(Phi.getType()));
+ }
+ }
+ }
+}
+
+// If the given branch is recognized as a foldable branch (i.e. conditional
+// branch with constant condition), it will perform following analyses and
+// transformation.
+// 1) If the dead out-coming edge is a critical-edge, split it. Let
+// R be the target of the dead out-coming edge.
+// 1) Identify the set of dead blocks implied by the branch's dead outcoming
+// edge. The result of this step will be {X| X is dominated by R}
+// 2) Identify those blocks which haves at least one dead prodecessor. The
+// result of this step will be dominance-frontier(R).
+// 3) Update the PHIs in DF(R) by replacing the operands corresponding to
+// dead blocks with "UndefVal" in an hope these PHIs will optimized away.
+//
+// Return true iff *NEW* dead code are found.
+bool GVN::processFoldableCondBr(BranchInst *BI) {
+ if (!BI || BI->isUnconditional())
+ return false;
+
+ ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
+ if (!Cond)
+ return false;
+
+ BasicBlock *DeadRoot = Cond->getZExtValue() ?
+ BI->getSuccessor(1) : BI->getSuccessor(0);
+ if (DeadBlocks.count(DeadRoot))
+ return false;
+
+ if (!DeadRoot->getSinglePredecessor())
+ DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot);
+
+ addDeadBlock(DeadRoot);
+ return true;
+}
+
+// performPRE() will trigger assert if it come across an instruciton without
+// associated val-num. As it normally has far more live instructions than dead
+// instructions, it makes more sense just to "fabricate" a val-number for the
+// dead code than checking if instruction involved is dead or not.
+void GVN::assignValNumForDeadCode() {
+ for (SetVector<BasicBlock *>::iterator I = DeadBlocks.begin(),
+ E = DeadBlocks.end(); I != E; I++) {
+ BasicBlock *BB = *I;
+ for (BasicBlock::iterator II = BB->begin(), EE = BB->end();
+ II != EE; II++) {
+ Instruction *Inst = &*II;
+ unsigned ValNum = VN.lookup_or_add(Inst);
+ addToLeaderTable(ValNum, Inst, BB);
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp b/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp
index 4796eb2..954e545 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -72,15 +72,13 @@ using namespace llvm;
static cl::opt<bool>
EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
- cl::desc("Enable global merge pass on constants"),
- cl::init(false));
+ cl::desc("Enable global merge pass on constants"),
+ cl::init(false));
STATISTIC(NumMerged , "Number of globals merged");
namespace {
class GlobalMerge : public FunctionPass {
- /// TLI - Keep a pointer of a TargetLowering to consult for determining
- /// target type sizes.
- const TargetLowering *TLI;
+ const TargetMachine *TM;
bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const;
@@ -104,8 +102,8 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid.
- explicit GlobalMerge(const TargetLowering *tli = 0)
- : FunctionPass(ID), TLI(tli) {
+ explicit GlobalMerge(const TargetMachine *TM = 0)
+ : FunctionPass(ID), TM(TM) {
initializeGlobalMergePass(*PassRegistry::getPassRegistry());
}
@@ -144,6 +142,7 @@ INITIALIZE_PASS(GlobalMerge, "global-merge",
bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const {
+ const TargetLowering *TLI = TM->getTargetLowering();
const DataLayout *TD = TLI->getDataLayout();
// FIXME: Infer the maximum possible offset depending on the actual users
@@ -234,6 +233,7 @@ void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
bool GlobalMerge::doInitialization(Module &M) {
DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
BSSGlobals;
+ const TargetLowering *TLI = TM->getTargetLowering();
const DataLayout *TD = TLI->getDataLayout();
unsigned MaxOffset = TLI->getMaximalGlobalOffset();
bool Changed = false;
@@ -305,6 +305,6 @@ bool GlobalMerge::doFinalization(Module &M) {
return false;
}
-Pass *llvm::createGlobalMergePass(const TargetLowering *tli) {
- return new GlobalMerge(tli);
+Pass *llvm::createGlobalMergePass(const TargetMachine *TM) {
+ return new GlobalMerge(TM);
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 8e76c78..235aaaa 100644
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -532,7 +532,8 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
// and varies predictably *inside* the loop. Evaluate the value it
// contains when the loop exits, if possible.
const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
- if (!SE->isLoopInvariant(ExitValue, L))
+ if (!SE->isLoopInvariant(ExitValue, L) ||
+ !isSafeToExpand(ExitValue, *SE))
continue;
// Computing the value outside of the loop brings no benefit if :
@@ -1479,8 +1480,14 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
if (IndVar->getType()->isPointerTy()
&& !IVCount->getType()->isPointerTy()) {
+ // IVOffset will be the new GEP offset that is interpreted by GEP as a
+ // signed value. IVCount on the other hand represents the loop trip count,
+ // which is an unsigned value. FindLoopCounter only allows induction
+ // variables that have a positive unit stride of one. This means we don't
+ // have to handle the case of negative offsets (yet) and just need to zero
+ // extend IVCount.
Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
- const SCEV *IVOffset = SE->getTruncateOrSignExtend(IVCount, OfsTy);
+ const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
// Expand the code for the iteration count.
assert(SE->isLoopInvariant(IVOffset, L) &&
@@ -1492,7 +1499,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
// We could handle pointer IVs other than i8*, but we need to compensate for
// gep index scaling. See canExpandBackedgeTakenCount comments.
- assert(SE->getSizeOfExpr(
+ assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
cast<PointerType>(GEPBase->getType())->getElementType())->isOne()
&& "unit stride pointer IV must be i8*");
@@ -1506,9 +1513,10 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
// BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
//
// Valid Cases: (1) both integers is most common; (2) both may be pointers
- // for simple memset-style loops; (3) IVInit is an integer and IVCount is a
- // pointer may occur when enable-iv-rewrite generates a canonical IV on top
- // of case #2.
+ // for simple memset-style loops.
+ //
+ // IVInit integer and IVCount pointer would only occur if a canonical IV
+ // were generated on top of case #2, which is not expected.
const SCEV *IVLimit = 0;
// For unit stride, IVCount = Start + BECount with 2's complement overflow.
@@ -1552,44 +1560,23 @@ LinearFunctionTestReplace(Loop *L,
SCEVExpander &Rewriter) {
assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
- // LFTR can ignore IV overflow and truncate to the width of
- // BECount. This avoids materializing the add(zext(add)) expression.
- Type *CntTy = BackedgeTakenCount->getType();
-
+ // Initialize CmpIndVar and IVCount to their preincremented values.
+ Value *CmpIndVar = IndVar;
const SCEV *IVCount = BackedgeTakenCount;
// If the exiting block is the same as the backedge block, we prefer to
// compare against the post-incremented value, otherwise we must compare
// against the preincremented value.
- Value *CmpIndVar;
if (L->getExitingBlock() == L->getLoopLatch()) {
// Add one to the "backedge-taken" count to get the trip count.
- // If this addition may overflow, we have to be more pessimistic and
- // cast the induction variable before doing the add.
- const SCEV *N =
- SE->getAddExpr(IVCount, SE->getConstant(IVCount->getType(), 1));
- if (CntTy == IVCount->getType())
- IVCount = N;
- else {
- const SCEV *Zero = SE->getConstant(IVCount->getType(), 0);
- if ((isa<SCEVConstant>(N) && !N->isZero()) ||
- SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
- // No overflow. Cast the sum.
- IVCount = SE->getTruncateOrZeroExtend(N, CntTy);
- } else {
- // Potential overflow. Cast before doing the add.
- IVCount = SE->getTruncateOrZeroExtend(IVCount, CntTy);
- IVCount = SE->getAddExpr(IVCount, SE->getConstant(CntTy, 1));
- }
- }
+ // This addition may overflow, which is valid as long as the comparison is
+ // truncated to BackedgeTakenCount->getType().
+ IVCount = SE->getAddExpr(BackedgeTakenCount,
+ SE->getConstant(BackedgeTakenCount->getType(), 1));
// The BackedgeTaken expression contains the number of times that the
// backedge branches to the loop header. This is one less than the
// number of times the loop executes, so use the incremented indvar.
CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
- } else {
- // We must use the preincremented value...
- IVCount = SE->getTruncateOrZeroExtend(IVCount, CntTy);
- CmpIndVar = IndVar;
}
Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
@@ -1612,12 +1599,40 @@ LinearFunctionTestReplace(Loop *L,
<< " IVCount:\t" << *IVCount << "\n");
IRBuilder<> Builder(BI);
- if (SE->getTypeSizeInBits(CmpIndVar->getType())
- > SE->getTypeSizeInBits(ExitCnt->getType())) {
- CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
- "lftr.wideiv");
- }
+ // LFTR can ignore IV overflow and truncate to the width of
+ // BECount. This avoids materializing the add(zext(add)) expression.
+ unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
+ unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
+ if (CmpIndVarSize > ExitCntSize) {
+ const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
+ const SCEV *ARStart = AR->getStart();
+ const SCEV *ARStep = AR->getStepRecurrence(*SE);
+ // For constant IVCount, avoid truncation.
+ if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
+ const APInt &Start = cast<SCEVConstant>(ARStart)->getValue()->getValue();
+ APInt Count = cast<SCEVConstant>(IVCount)->getValue()->getValue();
+ // Note that the post-inc value of BackedgeTakenCount may have overflowed
+ // above such that IVCount is now zero.
+ if (IVCount != BackedgeTakenCount && Count == 0) {
+ Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
+ ++Count;
+ }
+ else
+ Count = Count.zext(CmpIndVarSize);
+ APInt NewLimit;
+ if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
+ NewLimit = Start - Count;
+ else
+ NewLimit = Start + Count;
+ ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
+
+ DEBUG(dbgs() << " Widen RHS:\t" << *ExitCnt << "\n");
+ } else {
+ CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
+ "lftr.wideiv");
+ }
+ }
Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
Value *OrigCond = BI->getCondition();
// It's tempting to use replaceAllUsesWith here to fully replace the old
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index b61c5ba..b3ec2fc 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
@@ -129,6 +130,7 @@ namespace {
bool ProcessBranchOnXOR(BinaryOperator *BO);
bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
+ bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
};
}
@@ -775,7 +777,11 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
return true;
}
}
+
}
+
+ if (CondBr && CondConst && TryToUnfoldSelect(CondCmp, BB))
+ return true;
}
// Check for some cases that are worth simplifying. Right now we want to look
@@ -821,7 +827,6 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
return false;
}
-
/// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant
/// load instruction, eliminate it by replacing it with a PHI node. This is an
/// important optimization that encourages jump threading, and needs to be run
@@ -836,6 +841,12 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
if (LoadBB->getSinglePredecessor())
return false;
+ // If the load is defined in a landing pad, it can't be partially redundant,
+ // because the edges between the invoke and the landing pad cannot have other
+ // instructions between them.
+ if (LoadBB->isLandingPad())
+ return false;
+
Value *LoadedPtr = LI->getOperand(0);
// If the loaded operand is defined in the LoadBB, it can't be available.
@@ -1615,4 +1626,80 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
return true;
}
+/// TryToUnfoldSelect - Look for blocks of the form
+/// bb1:
+/// %a = select
+/// br bb
+///
+/// bb2:
+/// %p = phi [%a, %bb] ...
+/// %c = icmp %p
+/// br i1 %c
+///
+/// And expand the select into a branch structure if one of its arms allows %c
+/// to be folded. This later enables threading from bb1 over bb2.
+bool JumpThreading::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
+ BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
+ PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));
+ Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));
+
+ if (!CondBr || !CondBr->isConditional() || !CondLHS ||
+ CondLHS->getParent() != BB)
+ return false;
+
+ for (unsigned I = 0, E = CondLHS->getNumIncomingValues(); I != E; ++I) {
+ BasicBlock *Pred = CondLHS->getIncomingBlock(I);
+ SelectInst *SI = dyn_cast<SelectInst>(CondLHS->getIncomingValue(I));
+ // Look if one of the incoming values is a select in the corresponding
+ // predecessor.
+ if (!SI || SI->getParent() != Pred || !SI->hasOneUse())
+ continue;
+
+ BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
+ if (!PredTerm || !PredTerm->isUnconditional())
+ continue;
+
+ // Now check if one of the select values would allow us to constant fold the
+ // terminator in BB. We don't do the transform if both sides fold, those
+ // cases will be threaded in any case.
+ LazyValueInfo::Tristate LHSFolds =
+ LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),
+ CondRHS, Pred, BB);
+ LazyValueInfo::Tristate RHSFolds =
+ LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2),
+ CondRHS, Pred, BB);
+ if ((LHSFolds != LazyValueInfo::Unknown ||
+ RHSFolds != LazyValueInfo::Unknown) &&
+ LHSFolds != RHSFolds) {
+ // Expand the select.
+ //
+ // Pred --
+ // | v
+ // | NewBB
+ // | |
+ // |-----
+ // v
+ // BB
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
+ BB->getParent(), BB);
+ // Move the unconditional branch to NewBB.
+ PredTerm->removeFromParent();
+ NewBB->getInstList().insert(NewBB->end(), PredTerm);
+ // Create a conditional branch and update PHI nodes.
+ BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
+ CondLHS->setIncomingValue(I, SI->getFalseValue());
+ CondLHS->addIncoming(SI->getTrueValue(), NewBB);
+ // The select is now dead.
+ SI->eraseFromParent();
+
+ // Update any other PHI nodes in BB.
+ for (BasicBlock::iterator BI = BB->begin();
+ PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
+ if (Phi != CondLHS)
+ Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
+ return true;
+ }
+ }
+ return false;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index 0b62050..9e39d2e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -51,8 +51,8 @@ namespace {
}
private:
- bool isLoopDead(Loop *L, SmallVector<BasicBlock*, 4> &exitingBlocks,
- SmallVector<BasicBlock*, 4> &exitBlocks,
+ bool isLoopDead(Loop *L, SmallVectorImpl<BasicBlock *> &exitingBlocks,
+ SmallVectorImpl<BasicBlock *> &exitBlocks,
bool &Changed, BasicBlock *Preheader);
};
@@ -77,8 +77,8 @@ Pass *llvm::createLoopDeletionPass() {
/// checked for unique exit and exiting blocks, and that the code is in LCSSA
/// form.
bool LoopDeletion::isLoopDead(Loop *L,
- SmallVector<BasicBlock*, 4> &exitingBlocks,
- SmallVector<BasicBlock*, 4> &exitBlocks,
+ SmallVectorImpl<BasicBlock *> &exitingBlocks,
+ SmallVectorImpl<BasicBlock *> &exitBlocks,
bool &Changed, BasicBlock *Preheader) {
BasicBlock *exitBlock = exitBlocks[0];
@@ -209,7 +209,7 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
// Move all of the block's children to be children of the preheader, which
// allows us to remove the domtree entry for the block.
ChildNodes.insert(ChildNodes.begin(), DT[*LI]->begin(), DT[*LI]->end());
- for (SmallVector<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(),
+ for (SmallVectorImpl<DomTreeNode *>::iterator DI = ChildNodes.begin(),
DE = ChildNodes.end(); DI != DE; ++DI) {
DT.changeImmediateDominator(*DI, DT[preheader]);
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 8258719..952b76b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -81,7 +81,7 @@ namespace {
/// Return the condition of the branch terminating the given basic block.
static Value *getBrCondtion(BasicBlock *);
- /// Derive the precondition block (i.e the block that guards the loop
+ /// Derive the precondition block (i.e the block that guards the loop
/// preheader) from the given preheader.
static BasicBlock *getPrecondBb(BasicBlock *PreHead);
};
@@ -111,7 +111,7 @@ namespace {
/// beween a variable and zero, and if the variable is non-zero, the
/// control yeilds to the loop entry. If the branch matches the behavior,
/// the variable involved in the comparion is returned. This function will
- /// be called to see if the precondition and postcondition of the loop
+ /// be called to see if the precondition and postcondition of the loop
/// are in desirable form.
Value *matchCondition (BranchInst *Br, BasicBlock *NonZeroTarget) const;
@@ -274,11 +274,11 @@ static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE,
//
//===----------------------------------------------------------------------===//
-// This fucntion will return true iff the given block contains nothing but goto.
-// A typical usage of this function is to check if the preheader fucntion is
-// "almost" empty such that generated intrinsic function can be moved across
-// preheader and to be placed at the end of the preconditiona block without
-// concerning of breaking data dependence.
+// This function will return true iff the given block contains nothing but goto.
+// A typical usage of this function is to check if the preheader function is
+// "almost" empty such that generated intrinsic functions can be moved across
+// the preheader and be placed at the end of the precondition block without
+// the concern of breaking data dependence.
bool LIRUtil::isAlmostEmpty(BasicBlock *BB) {
if (BranchInst *Br = getBranch(BB)) {
return Br->isUnconditional() && BB->size() == 1;
@@ -314,7 +314,7 @@ bool NclPopcountRecognize::preliminaryScreen() {
if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
return false;
- // Counting population are usually conducted by few arithmetic instrutions.
+ // Counting population are usually conducted by few arithmetic instructions.
// Such instructions can be easilly "absorbed" by vacant slots in a
// non-compact loop. Therefore, recognizing popcount idiom only makes sense
// in a compact loop.
@@ -339,7 +339,7 @@ bool NclPopcountRecognize::preliminaryScreen() {
PreCondBB = LIRUtil::getPrecondBb(PreHead);
if (!PreCondBB)
return false;
-
+
return true;
}
@@ -504,7 +504,7 @@ void NclPopcountRecognize::transform(Instruction *CntInst,
// Assuming before transformation, the loop is following:
// if (x) // the precondition
// do { cnt++; x &= x - 1; } while(x);
-
+
// Step 1: Insert the ctpop instruction at the end of the precondition block
IRBuilderTy Builder(PreCondBr);
Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
@@ -611,7 +611,7 @@ void NclPopcountRecognize::transform(Instruction *CntInst,
SE->forgetLoop(CurLoop);
}
-CallInst *NclPopcountRecognize::createPopcntIntrinsic(IRBuilderTy &IRBuilder,
+CallInst *NclPopcountRecognize::createPopcntIntrinsic(IRBuilderTy &IRBuilder,
Value *Val, DebugLoc DL) {
Value *Ops[] = { Val };
Type *Tys[] = { Val->getType() };
@@ -667,13 +667,13 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
if (!getDataLayout())
return false;
- // set DT
+ // set DT
(void)getDominatorTree();
LoopInfo &LI = getAnalysis<LoopInfo>();
TLI = &getAnalysis<TargetLibraryInfo>();
- // set TLI
+ // set TLI
(void)getTargetLibraryInfo();
SmallVector<BasicBlock*, 8> ExitBlocks;
@@ -953,6 +953,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
Value *SplatValue = isBytewiseValue(StoredVal);
Constant *PatternValue = 0;
+ unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
+
// If we're allowed to form a memset, and the stored value would be acceptable
// for memset, use it.
if (SplatValue && TLI->has(LibFunc::memset) &&
@@ -961,8 +963,10 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
CurLoop->isLoopInvariant(SplatValue)) {
// Keep and use SplatValue.
PatternValue = 0;
- } else if (TLI->has(LibFunc::memset_pattern16) &&
+ } else if (DestAS == 0 &&
+ TLI->has(LibFunc::memset_pattern16) &&
(PatternValue = getMemSetPatternValue(StoredVal, *TD))) {
+ // Don't create memset_pattern16s with address spaces.
// It looks like we can use PatternValue!
SplatValue = 0;
} else {
@@ -978,20 +982,20 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
IRBuilder<> Builder(Preheader->getTerminator());
SCEVExpander Expander(*SE, "loop-idiom");
+ Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
+
// Okay, we have a strided store "p[i]" of a splattable value. We can turn
// this into a memset in the loop preheader now if we want. However, this
// would be unsafe to do if there is anything else in the loop that may read
// or write to the aliased location. Check for any overlap by generating the
// base pointer and checking the region.
- unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
Value *BasePtr =
- Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
+ Expander.expandCodeFor(Ev->getStart(), DestInt8PtrTy,
Preheader->getTerminator());
-
if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,
CurLoop, BECount,
- StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){
+ StoreSize, getAnalysis<AliasAnalysis>(), TheStore)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
deleteIfDeadInstruction(BasePtr, *SE, TLI);
@@ -1002,27 +1006,35 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
+ Type *IntPtr = Builder.getIntPtrTy(TD, DestAS);
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
SCEV::FlagNUW);
- if (StoreSize != 1)
+ if (StoreSize != 1) {
NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
SCEV::FlagNUW);
+ }
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
CallInst *NewCall;
- if (SplatValue)
- NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment);
- else {
+ if (SplatValue) {
+ NewCall = Builder.CreateMemSet(BasePtr,
+ SplatValue,
+ NumBytes,
+ StoreAlignment);
+ } else {
+ // Everything is emitted in default address space
+ Type *Int8PtrTy = DestInt8PtrTy;
+
Module *M = TheStore->getParent()->getParent()->getParent();
Value *MSP = M->getOrInsertFunction("memset_pattern16",
Builder.getVoidTy(),
- Builder.getInt8PtrTy(),
- Builder.getInt8PtrTy(), IntPtr,
+ Int8PtrTy,
+ Int8PtrTy,
+ IntPtr,
(void*)0);
// Otherwise we should form a memset_pattern16. PatternValue is known to be
@@ -1032,7 +1044,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
PatternValue, ".memset_pattern");
GV->setUnnamedAddr(true); // Ok to merge these.
GV->setAlignment(16);
- Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy());
+ Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
}
@@ -1108,17 +1120,17 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtr = TD->getIntPtrType(SI->getContext());
- BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+ Type *IntPtrTy = Builder.getIntPtrTy(TD, SI->getPointerAddressSpace());
+ BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
- const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
+ const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtrTy, 1),
SCEV::FlagNUW);
if (StoreSize != 1)
- NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+ NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
SCEV::FlagNUW);
Value *NumBytes =
- Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+ Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
CallInst *NewCall =
Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
new file mode 100644
index 0000000..335af81
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -0,0 +1,1184 @@
+//===-- LoopReroll.cpp - Loop rerolling pass ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements a simple loop reroller.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-reroll"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+using namespace llvm;
+
+STATISTIC(NumRerolledLoops, "Number of rerolled loops");
+
+static cl::opt<unsigned>
+MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden,
+ cl::desc("The maximum increment for loop rerolling"));
+
+// This loop re-rolling transformation aims to transform loops like this:
+//
+// int foo(int a);
+// void bar(int *x) {
+// for (int i = 0; i < 500; i += 3) {
+// foo(i);
+// foo(i+1);
+// foo(i+2);
+// }
+// }
+//
+// into a loop like this:
+//
+// void bar(int *x) {
+// for (int i = 0; i < 500; ++i)
+// foo(i);
+// }
+//
+// It does this by looking for loops that, besides the latch code, are composed
+// of isomorphic DAGs of instructions, with each DAG rooted at some increment
+// to the induction variable, and where each DAG is isomorphic to the DAG
+// rooted at the induction variable (excepting the sub-DAGs which root the
+// other induction-variable increments). In other words, we're looking for loop
+// bodies of the form:
+//
+// %iv = phi [ (preheader, ...), (body, %iv.next) ]
+// f(%iv)
+// %iv.1 = add %iv, 1 <-- a root increment
+// f(%iv.1)
+// %iv.2 = add %iv, 2 <-- a root increment
+// f(%iv.2)
+// %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
+// f(%iv.scale_m_1)
+// ...
+// %iv.next = add %iv, scale
+// %cmp = icmp(%iv, ...)
+// br %cmp, header, exit
+//
+// where each f(i) is a set of instructions that, collectively, are a function
+// only of i (and other loop-invariant values).
+//
+// As a special case, we can also reroll loops like this:
+//
+// int foo(int);
+// void bar(int *x) {
+// for (int i = 0; i < 500; ++i) {
+// x[3*i] = foo(0);
+// x[3*i+1] = foo(0);
+// x[3*i+2] = foo(0);
+// }
+// }
+//
+// into this:
+//
+// void bar(int *x) {
+// for (int i = 0; i < 1500; ++i)
+// x[i] = foo(0);
+// }
+//
+// in which case, we're looking for inputs like this:
+//
+// %iv = phi [ (preheader, ...), (body, %iv.next) ]
+// %scaled.iv = mul %iv, scale
+// f(%scaled.iv)
+// %scaled.iv.1 = add %scaled.iv, 1
+// f(%scaled.iv.1)
+// %scaled.iv.2 = add %scaled.iv, 2
+// f(%scaled.iv.2)
+// %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
+// f(%scaled.iv.scale_m_1)
+// ...
+// %iv.next = add %iv, 1
+// %cmp = icmp(%iv, ...)
+// br %cmp, header, exit
+
+namespace {
+ class LoopReroll : public LoopPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopReroll() : LoopPass(ID) {
+ initializeLoopRerollPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<TargetLibraryInfo>();
+ }
+
+protected:
+ AliasAnalysis *AA;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ DataLayout *DL;
+ TargetLibraryInfo *TLI;
+ DominatorTree *DT;
+
+ typedef SmallVector<Instruction *, 16> SmallInstructionVector;
+ typedef SmallSet<Instruction *, 16> SmallInstructionSet;
+
+ // A chain of isomorphic instructions, indentified by a single-use PHI,
+ // representing a reduction. Only the last value may be used outside the
+ // loop.
+ struct SimpleLoopReduction {
+ SimpleLoopReduction(Instruction *P, Loop *L)
+ : Valid(false), Instructions(1, P) {
+ assert(isa<PHINode>(P) && "First reduction instruction must be a PHI");
+ add(L);
+ }
+
+ bool valid() const {
+ return Valid;
+ }
+
+ Instruction *getPHI() const {
+ assert(Valid && "Using invalid reduction");
+ return Instructions.front();
+ }
+
+ Instruction *getReducedValue() const {
+ assert(Valid && "Using invalid reduction");
+ return Instructions.back();
+ }
+
+ Instruction *get(size_t i) const {
+ assert(Valid && "Using invalid reduction");
+ return Instructions[i+1];
+ }
+
+ Instruction *operator [] (size_t i) const { return get(i); }
+
+ // The size, ignoring the initial PHI.
+ size_t size() const {
+ assert(Valid && "Using invalid reduction");
+ return Instructions.size()-1;
+ }
+
+ typedef SmallInstructionVector::iterator iterator;
+ typedef SmallInstructionVector::const_iterator const_iterator;
+
+ iterator begin() {
+ assert(Valid && "Using invalid reduction");
+ return llvm::next(Instructions.begin());
+ }
+
+ const_iterator begin() const {
+ assert(Valid && "Using invalid reduction");
+ return llvm::next(Instructions.begin());
+ }
+
+ iterator end() { return Instructions.end(); }
+ const_iterator end() const { return Instructions.end(); }
+
+ protected:
+ bool Valid;
+ SmallInstructionVector Instructions;
+
+ void add(Loop *L);
+ };
+
+ // The set of all reductions, and state tracking of possible reductions
+ // during loop instruction processing.
+ struct ReductionTracker {
+ typedef SmallVector<SimpleLoopReduction, 16> SmallReductionVector;
+
+ // Add a new possible reduction.
+ void addSLR(SimpleLoopReduction &SLR) {
+ PossibleReds.push_back(SLR);
+ }
+
+ // Setup to track possible reductions corresponding to the provided
+ // rerolling scale. Only reductions with a number of non-PHI instructions
+ // that is divisible by the scale are considered. Three instructions sets
+ // are filled in:
+ // - A set of all possible instructions in eligible reductions.
+ // - A set of all PHIs in eligible reductions
+ // - A set of all reduced values (last instructions) in eligible reductions.
+ void restrictToScale(uint64_t Scale,
+ SmallInstructionSet &PossibleRedSet,
+ SmallInstructionSet &PossibleRedPHISet,
+ SmallInstructionSet &PossibleRedLastSet) {
+ PossibleRedIdx.clear();
+ PossibleRedIter.clear();
+ Reds.clear();
+
+ for (unsigned i = 0, e = PossibleReds.size(); i != e; ++i)
+ if (PossibleReds[i].size() % Scale == 0) {
+ PossibleRedLastSet.insert(PossibleReds[i].getReducedValue());
+ PossibleRedPHISet.insert(PossibleReds[i].getPHI());
+
+ PossibleRedSet.insert(PossibleReds[i].getPHI());
+ PossibleRedIdx[PossibleReds[i].getPHI()] = i;
+ for (SimpleLoopReduction::iterator J = PossibleReds[i].begin(),
+ JE = PossibleReds[i].end(); J != JE; ++J) {
+ PossibleRedSet.insert(*J);
+ PossibleRedIdx[*J] = i;
+ }
+ }
+ }
+
+ // The functions below are used while processing the loop instructions.
+
+ // Are the two instructions both from reductions, and furthermore, from
+ // the same reduction?
+ bool isPairInSame(Instruction *J1, Instruction *J2) {
+ DenseMap<Instruction *, int>::iterator J1I = PossibleRedIdx.find(J1);
+ if (J1I != PossibleRedIdx.end()) {
+ DenseMap<Instruction *, int>::iterator J2I = PossibleRedIdx.find(J2);
+ if (J2I != PossibleRedIdx.end() && J1I->second == J2I->second)
+ return true;
+ }
+
+ return false;
+ }
+
+ // The two provided instructions, the first from the base iteration, and
+ // the second from iteration i, form a matched pair. If these are part of
+ // a reduction, record that fact.
+ void recordPair(Instruction *J1, Instruction *J2, unsigned i) {
+ if (PossibleRedIdx.count(J1)) {
+ assert(PossibleRedIdx.count(J2) &&
+ "Recording reduction vs. non-reduction instruction?");
+
+ PossibleRedIter[J1] = 0;
+ PossibleRedIter[J2] = i;
+
+ int Idx = PossibleRedIdx[J1];
+ assert(Idx == PossibleRedIdx[J2] &&
+ "Recording pair from different reductions?");
+ Reds.insert(Idx);
+ }
+ }
+
+ // The functions below can be called after we've finished processing all
+ // instructions in the loop, and we know which reductions were selected.
+
+ // Is the provided instruction the PHI of a reduction selected for
+ // rerolling?
+ bool isSelectedPHI(Instruction *J) {
+ if (!isa<PHINode>(J))
+ return false;
+
+ for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
+ RI != RIE; ++RI) {
+ int i = *RI;
+ if (cast<Instruction>(J) == PossibleReds[i].getPHI())
+ return true;
+ }
+
+ return false;
+ }
+
+ bool validateSelected();
+ void replaceSelected();
+
+ protected:
+ // The vector of all possible reductions (for any scale).
+ SmallReductionVector PossibleReds;
+
+ DenseMap<Instruction *, int> PossibleRedIdx;
+ DenseMap<Instruction *, int> PossibleRedIter;
+ DenseSet<int> Reds;
+ };
+
+ void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
+ void collectPossibleReductions(Loop *L,
+ ReductionTracker &Reductions);
+ void collectInLoopUserSet(Loop *L,
+ const SmallInstructionVector &Roots,
+ const SmallInstructionSet &Exclude,
+ const SmallInstructionSet &Final,
+ DenseSet<Instruction *> &Users);
+ void collectInLoopUserSet(Loop *L,
+ Instruction * Root,
+ const SmallInstructionSet &Exclude,
+ const SmallInstructionSet &Final,
+ DenseSet<Instruction *> &Users);
+ bool findScaleFromMul(Instruction *RealIV, uint64_t &Scale,
+ Instruction *&IV,
+ SmallInstructionVector &LoopIncs);
+ bool collectAllRoots(Loop *L, uint64_t Inc, uint64_t Scale, Instruction *IV,
+ SmallVector<SmallInstructionVector, 32> &Roots,
+ SmallInstructionSet &AllRoots,
+ SmallInstructionVector &LoopIncs);
+ bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount,
+ ReductionTracker &Reductions);
+ };
+}
+
+char LoopReroll::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false)
+
+Pass *llvm::createLoopRerollPass() {
+ return new LoopReroll;
+}
+
+// Returns true if the provided instruction is used outside the given loop.
+// This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
+// non-loop blocks to be outside the loop.
+static bool hasUsesOutsideLoop(Instruction *I, Loop *L) {
+ for (Value::use_iterator UI = I->use_begin(),
+ UIE = I->use_end(); UI != UIE; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (!L->contains(User))
+ return true;
+ }
+
+ return false;
+}
+
+// Collect the list of loop induction variables with respect to which it might
+// be possible to reroll the loop.
+void LoopReroll::collectPossibleIVs(Loop *L,
+ SmallInstructionVector &PossibleIVs) {
+ BasicBlock *Header = L->getHeader();
+ for (BasicBlock::iterator I = Header->begin(),
+ IE = Header->getFirstInsertionPt(); I != IE; ++I) {
+ if (!isa<PHINode>(I))
+ continue;
+ if (!I->getType()->isIntegerTy())
+ continue;
+
+ if (const SCEVAddRecExpr *PHISCEV =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(I))) {
+ if (PHISCEV->getLoop() != L)
+ continue;
+ if (!PHISCEV->isAffine())
+ continue;
+ if (const SCEVConstant *IncSCEV =
+ dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE))) {
+ if (!IncSCEV->getValue()->getValue().isStrictlyPositive())
+ continue;
+ if (IncSCEV->getValue()->uge(MaxInc))
+ continue;
+
+ DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " <<
+ *PHISCEV << "\n");
+ PossibleIVs.push_back(I);
+ }
+ }
+ }
+}
+
+// Add the remainder of the reduction-variable chain to the instruction vector
+// (the initial PHINode has already been added). If successful, the object is
+// marked as valid.
+void LoopReroll::SimpleLoopReduction::add(Loop *L) {
+ assert(!Valid && "Cannot add to an already-valid chain");
+
+ // The reduction variable must be a chain of single-use instructions
+ // (including the PHI), except for the last value (which is used by the PHI
+ // and also outside the loop).
+ Instruction *C = Instructions.front();
+
+ do {
+ C = cast<Instruction>(*C->use_begin());
+ if (C->hasOneUse()) {
+ if (!C->isBinaryOp())
+ return;
+
+ if (!(isa<PHINode>(Instructions.back()) ||
+ C->isSameOperationAs(Instructions.back())))
+ return;
+
+ Instructions.push_back(C);
+ }
+ } while (C->hasOneUse());
+
+ if (Instructions.size() < 2 ||
+ !C->isSameOperationAs(Instructions.back()) ||
+ C->use_begin() == C->use_end())
+ return;
+
+ // C is now the (potential) last instruction in the reduction chain.
+ for (Value::use_iterator UI = C->use_begin(), UIE = C->use_end();
+ UI != UIE; ++UI) {
+ // The only in-loop user can be the initial PHI.
+ if (L->contains(cast<Instruction>(*UI)))
+ if (cast<Instruction>(*UI ) != Instructions.front())
+ return;
+ }
+
+ Instructions.push_back(C);
+ Valid = true;
+}
+
+// Collect the vector of possible reduction variables.
+void LoopReroll::collectPossibleReductions(Loop *L,
+ ReductionTracker &Reductions) {
+ BasicBlock *Header = L->getHeader();
+ for (BasicBlock::iterator I = Header->begin(),
+ IE = Header->getFirstInsertionPt(); I != IE; ++I) {
+ if (!isa<PHINode>(I))
+ continue;
+ if (!I->getType()->isSingleValueType())
+ continue;
+
+ SimpleLoopReduction SLR(I, L);
+ if (!SLR.valid())
+ continue;
+
+ DEBUG(dbgs() << "LRR: Possible reduction: " << *I << " (with " <<
+ SLR.size() << " chained instructions)\n");
+ Reductions.addSLR(SLR);
+ }
+}
+
+// Collect the set of all users of the provided root instruction. This set of
+// users contains not only the direct users of the root instruction, but also
+// all users of those users, and so on. There are two exceptions:
+//
+// 1. Instructions in the set of excluded instructions are never added to the
+// use set (even if they are users). This is used, for example, to exclude
+// including root increments in the use set of the primary IV.
+//
+// 2. Instructions in the set of final instructions are added to the use set
+// if they are users, but their users are not added. This is used, for
+// example, to prevent a reduction update from forcing all later reduction
+// updates into the use set.
+void LoopReroll::collectInLoopUserSet(Loop *L,
+ Instruction *Root, const SmallInstructionSet &Exclude,
+ const SmallInstructionSet &Final,
+ DenseSet<Instruction *> &Users) {
+ SmallInstructionVector Queue(1, Root);
+ while (!Queue.empty()) {
+ Instruction *I = Queue.pop_back_val();
+ if (!Users.insert(I).second)
+ continue;
+
+ if (!Final.count(I))
+ for (Value::use_iterator UI = I->use_begin(),
+ UIE = I->use_end(); UI != UIE; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (PHINode *PN = dyn_cast<PHINode>(User)) {
+ // Ignore "wrap-around" uses to PHIs of this loop's header.
+ if (PN->getIncomingBlock(UI) == L->getHeader())
+ continue;
+ }
+
+ if (L->contains(User) && !Exclude.count(User)) {
+ Queue.push_back(User);
+ }
+ }
+
+ // We also want to collect single-user "feeder" values.
+ for (User::op_iterator OI = I->op_begin(),
+ OIE = I->op_end(); OI != OIE; ++OI) {
+ if (Instruction *Op = dyn_cast<Instruction>(*OI))
+ if (Op->hasOneUse() && L->contains(Op) && !Exclude.count(Op) &&
+ !Final.count(Op))
+ Queue.push_back(Op);
+ }
+ }
+}
+
+// Collect all of the users of all of the provided root instructions (combined
+// into a single set).
+void LoopReroll::collectInLoopUserSet(Loop *L,
+ const SmallInstructionVector &Roots,
+ const SmallInstructionSet &Exclude,
+ const SmallInstructionSet &Final,
+ DenseSet<Instruction *> &Users) {
+ for (SmallInstructionVector::const_iterator I = Roots.begin(),
+ IE = Roots.end(); I != IE; ++I)
+ collectInLoopUserSet(L, *I, Exclude, Final, Users);
+}
+
+static bool isSimpleLoadStore(Instruction *I) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->isSimple();
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->isSimple();
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
+ return !MI->isVolatile();
+ return false;
+}
+
+// Recognize loops that are setup like this:
+//
+// %iv = phi [ (preheader, ...), (body, %iv.next) ]
+// %scaled.iv = mul %iv, scale
+// f(%scaled.iv)
+// %scaled.iv.1 = add %scaled.iv, 1
+// f(%scaled.iv.1)
+// %scaled.iv.2 = add %scaled.iv, 2
+// f(%scaled.iv.2)
+// %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
+// f(%scaled.iv.scale_m_1)
+// ...
+// %iv.next = add %iv, 1
+// %cmp = icmp(%iv, ...)
+// br %cmp, header, exit
+//
+// and, if found, set IV = %scaled.iv, and add %iv.next to LoopIncs.
+bool LoopReroll::findScaleFromMul(Instruction *RealIV, uint64_t &Scale,
+ Instruction *&IV,
+ SmallInstructionVector &LoopIncs) {
+ // This is a special case: here we're looking for all uses (except for
+ // the increment) to be multiplied by a common factor. The increment must
+ // be by one. This is to capture loops like:
+ // for (int i = 0; i < 500; ++i) {
+ // foo(3*i); foo(3*i+1); foo(3*i+2);
+ // }
+ if (RealIV->getNumUses() != 2)
+ return false;
+ const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(RealIV));
+ Instruction *User1 = cast<Instruction>(*RealIV->use_begin()),
+ *User2 = cast<Instruction>(*llvm::next(RealIV->use_begin()));
+ if (!SE->isSCEVable(User1->getType()) || !SE->isSCEVable(User2->getType()))
+ return false;
+ const SCEVAddRecExpr *User1SCEV =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(User1)),
+ *User2SCEV =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(User2));
+ if (!User1SCEV || !User1SCEV->isAffine() ||
+ !User2SCEV || !User2SCEV->isAffine())
+ return false;
+
+ // We assume below that User1 is the scale multiply and User2 is the
+ // increment. If this can't be true, then swap them.
+ if (User1SCEV == RealIVSCEV->getPostIncExpr(*SE)) {
+ std::swap(User1, User2);
+ std::swap(User1SCEV, User2SCEV);
+ }
+
+ if (User2SCEV != RealIVSCEV->getPostIncExpr(*SE))
+ return false;
+ assert(User2SCEV->getStepRecurrence(*SE)->isOne() &&
+ "Invalid non-unit step for multiplicative scaling");
+ LoopIncs.push_back(User2);
+
+ if (const SCEVConstant *MulScale =
+ dyn_cast<SCEVConstant>(User1SCEV->getStepRecurrence(*SE))) {
+ // Make sure that both the start and step have the same multiplier.
+ if (RealIVSCEV->getStart()->getType() != MulScale->getType())
+ return false;
+ if (SE->getMulExpr(RealIVSCEV->getStart(), MulScale) !=
+ User1SCEV->getStart())
+ return false;
+
+ ConstantInt *MulScaleCI = MulScale->getValue();
+ if (!MulScaleCI->uge(2) || MulScaleCI->uge(MaxInc))
+ return false;
+ Scale = MulScaleCI->getZExtValue();
+ IV = User1;
+ } else
+ return false;
+
+ DEBUG(dbgs() << "LRR: Found possible scaling " << *User1 << "\n");
+ return true;
+}
+
+// Collect all root increments with respect to the provided induction variable
+// (normally the PHI, but sometimes a multiply). A root increment is an
+// instruction, normally an add, with a positive constant less than Scale. In a
+// rerollable loop, each of these increments is the root of an instruction
+// graph isomorphic to the others. Also, we collect the final induction
+// increment (the increment equal to the Scale), and its users in LoopIncs.
+bool LoopReroll::collectAllRoots(Loop *L, uint64_t Inc, uint64_t Scale,
+ Instruction *IV,
+ SmallVector<SmallInstructionVector, 32> &Roots,
+ SmallInstructionSet &AllRoots,
+ SmallInstructionVector &LoopIncs) {
+ for (Value::use_iterator UI = IV->use_begin(),
+ UIE = IV->use_end(); UI != UIE; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (!SE->isSCEVable(User->getType()))
+ continue;
+ if (User->getType() != IV->getType())
+ continue;
+ if (!L->contains(User))
+ continue;
+ if (hasUsesOutsideLoop(User, L))
+ continue;
+
+ if (const SCEVConstant *Diff = dyn_cast<SCEVConstant>(SE->getMinusSCEV(
+ SE->getSCEV(User), SE->getSCEV(IV)))) {
+ uint64_t Idx = Diff->getValue()->getValue().getZExtValue();
+ if (Idx > 0 && Idx < Scale) {
+ Roots[Idx-1].push_back(User);
+ AllRoots.insert(User);
+ } else if (Idx == Scale && Inc > 1) {
+ LoopIncs.push_back(User);
+ }
+ }
+ }
+
+ if (Roots[0].empty())
+ return false;
+ bool AllSame = true;
+ for (unsigned i = 1; i < Scale-1; ++i)
+ if (Roots[i].size() != Roots[0].size()) {
+ AllSame = false;
+ break;
+ }
+
+ if (!AllSame)
+ return false;
+
+ return true;
+}
+
+// Validate the selected reductions. All iterations must have an isomorphic
+// part of the reduction chain and, for non-associative reductions, the chain
+// entries must appear in order.
+bool LoopReroll::ReductionTracker::validateSelected() {
+ // For a non-associative reduction, the chain entries must appear in order.
+ for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
+ RI != RIE; ++RI) {
+ int i = *RI;
+ int PrevIter = 0, BaseCount = 0, Count = 0;
+ for (SimpleLoopReduction::iterator J = PossibleReds[i].begin(),
+ JE = PossibleReds[i].end(); J != JE; ++J) {
+ // Note that all instructions in the chain must have been found because
+ // all instructions in the function must have been assigned to some
+ // iteration.
+ int Iter = PossibleRedIter[*J];
+ if (Iter != PrevIter && Iter != PrevIter + 1 &&
+ !PossibleReds[i].getReducedValue()->isAssociative()) {
+ DEBUG(dbgs() << "LRR: Out-of-order non-associative reduction: " <<
+ *J << "\n");
+ return false;
+ }
+
+ if (Iter != PrevIter) {
+ if (Count != BaseCount) {
+ DEBUG(dbgs() << "LRR: Iteration " << PrevIter <<
+ " reduction use count " << Count <<
+ " is not equal to the base use count " <<
+ BaseCount << "\n");
+ return false;
+ }
+
+ Count = 0;
+ }
+
+ ++Count;
+ if (Iter == 0)
+ ++BaseCount;
+
+ PrevIter = Iter;
+ }
+ }
+
+ return true;
+}
+
+// For all selected reductions, remove all parts except those in the first
+// iteration (and the PHI). Replace outside uses of the reduced value with uses
+// of the first-iteration reduced value (in other words, reroll the selected
+// reductions).
+void LoopReroll::ReductionTracker::replaceSelected() {
+ // Fixup reductions to refer to the last instruction associated with the
+ // first iteration (not the last).
+ for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
+ RI != RIE; ++RI) {
+ int i = *RI;
+ int j = 0;
+ for (int e = PossibleReds[i].size(); j != e; ++j)
+ if (PossibleRedIter[PossibleReds[i][j]] != 0) {
+ --j;
+ break;
+ }
+
+ // Replace users with the new end-of-chain value.
+ SmallInstructionVector Users;
+ for (Value::use_iterator UI =
+ PossibleReds[i].getReducedValue()->use_begin(),
+ UIE = PossibleReds[i].getReducedValue()->use_end(); UI != UIE; ++UI)
+ Users.push_back(cast<Instruction>(*UI));
+
+ for (SmallInstructionVector::iterator J = Users.begin(),
+ JE = Users.end(); J != JE; ++J)
+ (*J)->replaceUsesOfWith(PossibleReds[i].getReducedValue(),
+ PossibleReds[i][j]);
+ }
+}
+
+// Reroll the provided loop with respect to the provided induction variable.
+// Generally, we're looking for a loop like this:
+//
+// %iv = phi [ (preheader, ...), (body, %iv.next) ]
+// f(%iv)
+// %iv.1 = add %iv, 1 <-- a root increment
+// f(%iv.1)
+// %iv.2 = add %iv, 2 <-- a root increment
+// f(%iv.2)
+// %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
+// f(%iv.scale_m_1)
+// ...
+// %iv.next = add %iv, scale
+// %cmp = icmp(%iv, ...)
+// br %cmp, header, exit
+//
+// Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of
+// instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can
+// be intermixed with eachother. The restriction imposed by this algorithm is
+// that the relative order of the isomorphic instructions in f(%iv), f(%iv.1),
+// etc. be the same.
+//
+// First, we collect the use set of %iv, excluding the other increment roots.
+// This gives us f(%iv). Then we iterate over the loop instructions (scale-1)
+// times, having collected the use set of f(%iv.(i+1)), during which we:
+// - Ensure that the next unmatched instruction in f(%iv) is isomorphic to
+// the next unmatched instruction in f(%iv.(i+1)).
+// - Ensure that both matched instructions don't have any external users
+// (with the exception of last-in-chain reduction instructions).
+// - Track the (aliasing) write set, and other side effects, of all
+// instructions that belong to future iterations that come before the matched
+// instructions. If the matched instructions read from that write set, then
+// f(%iv) or f(%iv.(i+1)) has some dependency on instructions in
+// f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly,
+// if any of these future instructions had side effects (could not be
+// speculatively executed), and so do the matched instructions, when we
+// cannot reorder those side-effect-producing instructions, and rerolling
+// fails.
+//
+// Finally, we make sure that all loop instructions are either loop increment
+// roots, belong to simple latch code, parts of validated reductions, part of
+// f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions
+// have been validated), then we reroll the loop.
+bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
+ const SCEV *IterCount,
+ ReductionTracker &Reductions) {
+ const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV));
+ uint64_t Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))->
+ getValue()->getZExtValue();
+ // The collection of loop increment instructions.
+ SmallInstructionVector LoopIncs;
+ uint64_t Scale = Inc;
+
+ // The effective induction variable, IV, is normally also the real induction
+ // variable. When we're dealing with a loop like:
+ // for (int i = 0; i < 500; ++i)
+ // x[3*i] = ...;
+ // x[3*i+1] = ...;
+ // x[3*i+2] = ...;
+ // then the real IV is still i, but the effective IV is (3*i).
+ Instruction *RealIV = IV;
+ if (Inc == 1 && !findScaleFromMul(RealIV, Scale, IV, LoopIncs))
+ return false;
+
+ assert(Scale <= MaxInc && "Scale is too large");
+ assert(Scale > 1 && "Scale must be at least 2");
+
+ // The set of increment instructions for each increment value.
+ SmallVector<SmallInstructionVector, 32> Roots(Scale-1);
+ SmallInstructionSet AllRoots;
+ if (!collectAllRoots(L, Inc, Scale, IV, Roots, AllRoots, LoopIncs))
+ return false;
+
+ DEBUG(dbgs() << "LRR: Found all root induction increments for: " <<
+ *RealIV << "\n");
+
+ // An array of just the possible reductions for this scale factor. When we
+ // collect the set of all users of some root instructions, these reduction
+ // instructions are treated as 'final' (their uses are not considered).
+ // This is important because we don't want the root use set to search down
+ // the reduction chain.
+ SmallInstructionSet PossibleRedSet;
+ SmallInstructionSet PossibleRedLastSet, PossibleRedPHISet;
+ Reductions.restrictToScale(Scale, PossibleRedSet, PossibleRedPHISet,
+ PossibleRedLastSet);
+
+ // We now need to check for equivalence of the use graph of each root with
+ // that of the primary induction variable (excluding the roots). Our goal
+ // here is not to solve the full graph isomorphism problem, but rather to
+ // catch common cases without a lot of work. As a result, we will assume
+ // that the relative order of the instructions in each unrolled iteration
+ // is the same (although we will not make an assumption about how the
+ // different iterations are intermixed). Note that while the order must be
+ // the same, the instructions may not be in the same basic block.
+ SmallInstructionSet Exclude(AllRoots);
+ Exclude.insert(LoopIncs.begin(), LoopIncs.end());
+
+ DenseSet<Instruction *> BaseUseSet;
+ collectInLoopUserSet(L, IV, Exclude, PossibleRedSet, BaseUseSet);
+
+ DenseSet<Instruction *> AllRootUses;
+ std::vector<DenseSet<Instruction *> > RootUseSets(Scale-1);
+
+ bool MatchFailed = false;
+ for (unsigned i = 0; i < Scale-1 && !MatchFailed; ++i) {
+ DenseSet<Instruction *> &RootUseSet = RootUseSets[i];
+ collectInLoopUserSet(L, Roots[i], SmallInstructionSet(),
+ PossibleRedSet, RootUseSet);
+
+ DEBUG(dbgs() << "LRR: base use set size: " << BaseUseSet.size() <<
+ " vs. iteration increment " << (i+1) <<
+ " use set size: " << RootUseSet.size() << "\n");
+
+ if (BaseUseSet.size() != RootUseSet.size()) {
+ MatchFailed = true;
+ break;
+ }
+
+ // In addition to regular aliasing information, we need to look for
+ // instructions from later (future) iterations that have side effects
+ // preventing us from reordering them past other instructions with side
+ // effects.
+ bool FutureSideEffects = false;
+ AliasSetTracker AST(*AA);
+
+ // The map between instructions in f(%iv.(i+1)) and f(%iv).
+ DenseMap<Value *, Value *> BaseMap;
+
+ assert(L->getNumBlocks() == 1 && "Cannot handle multi-block loops");
+ for (BasicBlock::iterator J1 = Header->begin(), J2 = Header->begin(),
+ JE = Header->end(); J1 != JE && !MatchFailed; ++J1) {
+ if (cast<Instruction>(J1) == RealIV)
+ continue;
+ if (cast<Instruction>(J1) == IV)
+ continue;
+ if (!BaseUseSet.count(J1))
+ continue;
+ if (PossibleRedPHISet.count(J1)) // Skip reduction PHIs.
+ continue;
+
+ while (J2 != JE && (!RootUseSet.count(J2) ||
+ std::find(Roots[i].begin(), Roots[i].end(), J2) !=
+ Roots[i].end())) {
+ // As we iterate through the instructions, instructions that don't
+ // belong to previous iterations (or the base case), must belong to
+ // future iterations. We want to track the alias set of writes from
+ // previous iterations.
+ if (!isa<PHINode>(J2) && !BaseUseSet.count(J2) &&
+ !AllRootUses.count(J2)) {
+ if (J2->mayWriteToMemory())
+ AST.add(J2);
+
+ // Note: This is specifically guarded by a check on isa<PHINode>,
+ // which while a valid (somewhat arbitrary) micro-optimization, is
+ // needed because otherwise isSafeToSpeculativelyExecute returns
+ // false on PHI nodes.
+ if (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2, DL))
+ FutureSideEffects = true;
+ }
+
+ ++J2;
+ }
+
+ if (!J1->isSameOperationAs(J2)) {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+ " vs. " << *J2 << "\n");
+ MatchFailed = true;
+ break;
+ }
+
+ // Make sure that this instruction, which is in the use set of this
+ // root instruction, does not also belong to the base set or the set of
+ // some previous root instruction.
+ if (BaseUseSet.count(J2) || AllRootUses.count(J2)) {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+ " vs. " << *J2 << " (prev. case overlap)\n");
+ MatchFailed = true;
+ break;
+ }
+
+ // Make sure that we don't alias with any instruction in the alias set
+ // tracker. If we do, then we depend on a future iteration, and we
+ // can't reroll.
+ if (J2->mayReadFromMemory()) {
+ for (AliasSetTracker::iterator K = AST.begin(), KE = AST.end();
+ K != KE && !MatchFailed; ++K) {
+ if (K->aliasesUnknownInst(J2, *AA)) {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+ " vs. " << *J2 << " (depends on future store)\n");
+ MatchFailed = true;
+ break;
+ }
+ }
+ }
+
+ // If we've past an instruction from a future iteration that may have
+ // side effects, and this instruction might also, then we can't reorder
+ // them, and this matching fails. As an exception, we allow the alias
+ // set tracker to handle regular (simple) load/store dependencies.
+ if (FutureSideEffects &&
+ ((!isSimpleLoadStore(J1) && !isSafeToSpeculativelyExecute(J1)) ||
+ (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2)))) {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+ " vs. " << *J2 <<
+ " (side effects prevent reordering)\n");
+ MatchFailed = true;
+ break;
+ }
+
+ // For instructions that are part of a reduction, if the operation is
+ // associative, then don't bother matching the operands (because we
+ // already know that the instructions are isomorphic, and the order
+ // within the iteration does not matter). For non-associative reductions,
+ // we do need to match the operands, because we need to reject
+ // out-of-order instructions within an iteration!
+ // For example (assume floating-point addition), we need to reject this:
+ // x += a[i]; x += b[i];
+ // x += a[i+1]; x += b[i+1];
+ // x += b[i+2]; x += a[i+2];
+ bool InReduction = Reductions.isPairInSame(J1, J2);
+
+ if (!(InReduction && J1->isAssociative())) {
+ bool Swapped = false, SomeOpMatched = false;;
+ for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) {
+ Value *Op2 = J2->getOperand(j);
+
+ // If this is part of a reduction (and the operation is not
+ // associatve), then we match all operands, but not those that are
+ // part of the reduction.
+ if (InReduction)
+ if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
+ if (Reductions.isPairInSame(J2, Op2I))
+ continue;
+
+ DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
+ if (BMI != BaseMap.end())
+ Op2 = BMI->second;
+ else if (std::find(Roots[i].begin(), Roots[i].end(),
+ (Instruction*) Op2) != Roots[i].end())
+ Op2 = IV;
+
+ if (J1->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
+ // If we've not already decided to swap the matched operands, and
+ // we've not already matched our first operand (note that we could
+ // have skipped matching the first operand because it is part of a
+ // reduction above), and the instruction is commutative, then try
+ // the swapped match.
+ if (!Swapped && J1->isCommutative() && !SomeOpMatched &&
+ J1->getOperand(!j) == Op2) {
+ Swapped = true;
+ } else {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+ " vs. " << *J2 << " (operand " << j << ")\n");
+ MatchFailed = true;
+ break;
+ }
+ }
+
+ SomeOpMatched = true;
+ }
+ }
+
+ if ((!PossibleRedLastSet.count(J1) && hasUsesOutsideLoop(J1, L)) ||
+ (!PossibleRedLastSet.count(J2) && hasUsesOutsideLoop(J2, L))) {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+ " vs. " << *J2 << " (uses outside loop)\n");
+ MatchFailed = true;
+ break;
+ }
+
+ if (!MatchFailed)
+ BaseMap.insert(std::pair<Value *, Value *>(J2, J1));
+
+ AllRootUses.insert(J2);
+ Reductions.recordPair(J1, J2, i+1);
+
+ ++J2;
+ }
+ }
+
+ if (MatchFailed)
+ return false;
+
+ DEBUG(dbgs() << "LRR: Matched all iteration increments for " <<
+ *RealIV << "\n");
+
+ DenseSet<Instruction *> LoopIncUseSet;
+ collectInLoopUserSet(L, LoopIncs, SmallInstructionSet(),
+ SmallInstructionSet(), LoopIncUseSet);
+ DEBUG(dbgs() << "LRR: Loop increment set size: " <<
+ LoopIncUseSet.size() << "\n");
+
+ // Make sure that all instructions in the loop have been included in some
+ // use set.
+ for (BasicBlock::iterator J = Header->begin(), JE = Header->end();
+ J != JE; ++J) {
+ if (isa<DbgInfoIntrinsic>(J))
+ continue;
+ if (cast<Instruction>(J) == RealIV)
+ continue;
+ if (cast<Instruction>(J) == IV)
+ continue;
+ if (BaseUseSet.count(J) || AllRootUses.count(J) ||
+ (LoopIncUseSet.count(J) && (J->isTerminator() ||
+ isSafeToSpeculativelyExecute(J, DL))))
+ continue;
+
+ if (AllRoots.count(J))
+ continue;
+
+ if (Reductions.isSelectedPHI(J))
+ continue;
+
+ DEBUG(dbgs() << "LRR: aborting reroll based on " << *RealIV <<
+ " unprocessed instruction found: " << *J << "\n");
+ MatchFailed = true;
+ break;
+ }
+
+ if (MatchFailed)
+ return false;
+
+ DEBUG(dbgs() << "LRR: all instructions processed from " <<
+ *RealIV << "\n");
+
+ if (!Reductions.validateSelected())
+ return false;
+
+ // At this point, we've validated the rerolling, and we're committed to
+ // making changes!
+
+ Reductions.replaceSelected();
+
+ // Remove instructions associated with non-base iterations.
+ for (BasicBlock::reverse_iterator J = Header->rbegin();
+ J != Header->rend();) {
+ if (AllRootUses.count(&*J)) {
+ Instruction *D = &*J;
+ DEBUG(dbgs() << "LRR: removing: " << *D << "\n");
+ D->eraseFromParent();
+ continue;
+ }
+
+ ++J;
+ }
+
+ // Insert the new induction variable.
+ const SCEV *Start = RealIVSCEV->getStart();
+ if (Inc == 1)
+ Start = SE->getMulExpr(Start,
+ SE->getConstant(Start->getType(), Scale));
+ const SCEVAddRecExpr *H =
+ cast<SCEVAddRecExpr>(SE->getAddRecExpr(Start,
+ SE->getConstant(RealIVSCEV->getType(), 1),
+ L, SCEV::FlagAnyWrap));
+ { // Limit the lifetime of SCEVExpander.
+ SCEVExpander Expander(*SE, "reroll");
+ PHINode *NewIV =
+ cast<PHINode>(Expander.expandCodeFor(H, IV->getType(),
+ Header->begin()));
+ for (DenseSet<Instruction *>::iterator J = BaseUseSet.begin(),
+ JE = BaseUseSet.end(); J != JE; ++J)
+ (*J)->replaceUsesOfWith(IV, NewIV);
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) {
+ if (LoopIncUseSet.count(BI)) {
+ const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
+ if (Inc == 1)
+ ICSCEV =
+ SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale));
+ Value *IC;
+ if (isa<SCEVConstant>(ICSCEV)) {
+ IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(), BI);
+ } else {
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader)
+ Preheader = InsertPreheaderForLoop(L, this);
+
+ IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(),
+ Preheader->getTerminator());
+ }
+
+ Value *NewIVNext = NewIV->getIncomingValueForBlock(Header);
+ Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIVNext, IC,
+ "exitcond");
+ BI->setCondition(Cond);
+
+ if (BI->getSuccessor(1) != Header)
+ BI->swapSuccessors();
+ }
+ }
+ }
+
+ SimplifyInstructionsInBlock(Header, DL, TLI);
+ DeleteDeadPHIs(Header, TLI);
+ ++NumRerolledLoops;
+ return true;
+}
+
+bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
+ AA = &getAnalysis<AliasAnalysis>();
+ LI = &getAnalysis<LoopInfo>();
+ SE = &getAnalysis<ScalarEvolution>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+ DL = getAnalysisIfAvailable<DataLayout>();
+ DT = &getAnalysis<DominatorTree>();
+
+ BasicBlock *Header = L->getHeader();
+ DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() <<
+ "] Loop %" << Header->getName() << " (" <<
+ L->getNumBlocks() << " block(s))\n");
+
+ bool Changed = false;
+
+ // For now, we'll handle only single BB loops.
+ if (L->getNumBlocks() > 1)
+ return Changed;
+
+ if (!SE->hasLoopInvariantBackedgeTakenCount(L))
+ return Changed;
+
+ const SCEV *LIBETC = SE->getBackedgeTakenCount(L);
+ const SCEV *IterCount =
+ SE->getAddExpr(LIBETC, SE->getConstant(LIBETC->getType(), 1));
+ DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n");
+
+ // First, we need to find the induction variable with respect to which we can
+ // reroll (there may be several possible options).
+ SmallInstructionVector PossibleIVs;
+ collectPossibleIVs(L, PossibleIVs);
+
+ if (PossibleIVs.empty()) {
+ DEBUG(dbgs() << "LRR: No possible IVs found\n");
+ return Changed;
+ }
+
+ ReductionTracker Reductions;
+ collectPossibleReductions(L, Reductions);
+
+ // For each possible IV, collect the associated possible set of 'root' nodes
+ // (i+1, i+2, etc.).
+ for (SmallInstructionVector::iterator I = PossibleIVs.begin(),
+ IE = PossibleIVs.end(); I != IE; ++I)
+ if (reroll(*I, L, Header, IterCount, Reductions)) {
+ Changed = true;
+ break;
+ }
+
+ return Changed;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 73e44d7..eff5268 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -774,6 +774,16 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
}
namespace {
+class LSRUse;
+}
+// Check if it is legal to fold 2 base registers.
+static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
+ const Formula &F);
+// Get the cost of the scaling factor used in F for LU.
+static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
+ const LSRUse &LU, const Formula &F);
+
+namespace {
/// Cost - This class is used to measure and compare candidate formulae.
class Cost {
@@ -785,11 +795,12 @@ class Cost {
unsigned NumBaseAdds;
unsigned ImmCost;
unsigned SetupCost;
+ unsigned ScaleCost;
public:
Cost()
: NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
- SetupCost(0) {}
+ SetupCost(0), ScaleCost(0) {}
bool operator<(const Cost &Other) const;
@@ -799,9 +810,9 @@ public:
// Once any of the metrics loses, they must all remain losers.
bool isValid() {
return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
- | ImmCost | SetupCost) != ~0u)
+ | ImmCost | SetupCost | ScaleCost) != ~0u)
|| ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
- & ImmCost & SetupCost) == ~0u);
+ & ImmCost & SetupCost & ScaleCost) == ~0u);
}
#endif
@@ -810,12 +821,14 @@ public:
return NumRegs == ~0u;
}
- void RateFormula(const Formula &F,
+ void RateFormula(const TargetTransformInfo &TTI,
+ const Formula &F,
SmallPtrSet<const SCEV *, 16> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
ScalarEvolution &SE, DominatorTree &DT,
+ const LSRUse &LU,
SmallPtrSet<const SCEV *, 16> *LoserRegs = 0);
void print(raw_ostream &OS) const;
@@ -900,12 +913,14 @@ void Cost::RatePrimaryRegister(const SCEV *Reg,
}
}
-void Cost::RateFormula(const Formula &F,
+void Cost::RateFormula(const TargetTransformInfo &TTI,
+ const Formula &F,
SmallPtrSet<const SCEV *, 16> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
ScalarEvolution &SE, DominatorTree &DT,
+ const LSRUse &LU,
SmallPtrSet<const SCEV *, 16> *LoserRegs) {
// Tally up the registers.
if (const SCEV *ScaledReg = F.ScaledReg) {
@@ -932,7 +947,12 @@ void Cost::RateFormula(const Formula &F,
// Determine how many (unfolded) adds we'll need inside the loop.
size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0);
if (NumBaseParts > 1)
- NumBaseAdds += NumBaseParts - 1;
+ // Do not count the base and a possible second register if the target
+ // allows to fold 2 registers.
+ NumBaseAdds += NumBaseParts - (1 + isLegal2RegAMUse(TTI, LU, F));
+
+ // Accumulate non-free scaling amounts.
+ ScaleCost += getScalingFactorCost(TTI, LU, F);
// Tally up the non-zero immediates.
for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
@@ -955,6 +975,7 @@ void Cost::Loose() {
NumBaseAdds = ~0u;
ImmCost = ~0u;
SetupCost = ~0u;
+ ScaleCost = ~0u;
}
/// operator< - Choose the lower cost.
@@ -967,6 +988,8 @@ bool Cost::operator<(const Cost &Other) const {
return NumIVMuls < Other.NumIVMuls;
if (NumBaseAdds != Other.NumBaseAdds)
return NumBaseAdds < Other.NumBaseAdds;
+ if (ScaleCost != Other.ScaleCost)
+ return ScaleCost < Other.ScaleCost;
if (ImmCost != Other.ImmCost)
return ImmCost < Other.ImmCost;
if (SetupCost != Other.SetupCost)
@@ -983,6 +1006,8 @@ void Cost::print(raw_ostream &OS) const {
if (NumBaseAdds != 0)
OS << ", plus " << NumBaseAdds << " base add"
<< (NumBaseAdds == 1 ? "" : "s");
+ if (ScaleCost != 0)
+ OS << ", plus " << ScaleCost << " scale cost";
if (ImmCost != 0)
OS << ", plus " << ImmCost << " imm cost";
if (SetupCost != 0)
@@ -1145,6 +1170,13 @@ public:
/// may be used.
bool AllFixupsOutsideLoop;
+ /// RigidFormula is set to true to guarantee that this use will be associated
+ /// with a single formula--the one that initially matched. Some SCEV
+ /// expressions cannot be expanded. This allows LSR to consider the registers
+ /// used by those expressions without the need to expand them later after
+ /// changing the formula.
+ bool RigidFormula;
+
/// WidestFixupType - This records the widest use type for any fixup using
/// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
/// max fixup widths to be equivalent, because the narrower one may be relying
@@ -1163,6 +1195,7 @@ public:
MinOffset(INT64_MAX),
MaxOffset(INT64_MIN),
AllFixupsOutsideLoop(true),
+ RigidFormula(false),
WidestFixupType(0) {}
bool HasFormulaWithSameRegs(const Formula &F) const;
@@ -1189,6 +1222,9 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
bool LSRUse::InsertFormula(const Formula &F) {
+ if (!Formulae.empty() && RigidFormula)
+ return false;
+
SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
@@ -1359,6 +1395,66 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
F.BaseOffset, F.HasBaseReg, F.Scale);
}
+static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
+ const Formula &F) {
+ // If F is used as an Addressing Mode, it may fold one Base plus one
+ // scaled register. If the scaled register is nil, do as if another
+ // element of the base regs is a 1-scaled register.
+ // This is possible if BaseRegs has at least 2 registers.
+
+ // If this is not an address calculation, this is not an addressing mode
+ // use.
+ if (LU.Kind != LSRUse::Address)
+ return false;
+
+ // F is already scaled.
+ if (F.Scale != 0)
+ return false;
+
+ // We need to keep one register for the base and one to scale.
+ if (F.BaseRegs.size() < 2)
+ return false;
+
+ return isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+ F.BaseGV, F.BaseOffset, F.HasBaseReg, 1);
+ }
+
+static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
+ const LSRUse &LU, const Formula &F) {
+ if (!F.Scale)
+ return 0;
+ assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, F) && "Illegal formula in use.");
+
+ switch (LU.Kind) {
+ case LSRUse::Address: {
+ // Check the scaling factor cost with both the min and max offsets.
+ int ScaleCostMinOffset =
+ TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
+ F.BaseOffset + LU.MinOffset,
+ F.HasBaseReg, F.Scale);
+ int ScaleCostMaxOffset =
+ TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
+ F.BaseOffset + LU.MaxOffset,
+ F.HasBaseReg, F.Scale);
+
+ assert(ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 &&
+ "Legal addressing mode has an illegal cost!");
+ return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
+ }
+ case LSRUse::ICmpZero:
+ // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg.
+ // Therefore, return 0 in case F.Scale == -1.
+ return F.Scale != -1;
+
+ case LSRUse::Basic:
+ case LSRUse::Special:
+ return 0;
+ }
+
+ llvm_unreachable("Invalid LSRUse Kind!");
+}
+
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
LSRUse::KindType Kind, Type *AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
@@ -1664,7 +1760,7 @@ void LSRInstance::OptimizeShadowIV() {
IVUsers::const_iterator CandidateUI = UI;
++UI;
Instruction *ShadowUse = CandidateUI->getUser();
- Type *DestTy = NULL;
+ Type *DestTy = 0;
bool IsSigned = false;
/* If shadow use is a int->float cast then insert a second IV
@@ -1726,7 +1822,7 @@ void LSRInstance::OptimizeShadowIV() {
continue;
/* Initialize new IV, double d = 0.0 in above example. */
- ConstantInt *C = NULL;
+ ConstantInt *C = 0;
if (Incr->getOperand(0) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(1));
else if (Incr->getOperand(1) == PH)
@@ -2858,7 +2954,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
- if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) {
+ if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
N = TransformForPostIncUse(Normalize, N, CI, 0,
@@ -2901,6 +2997,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
/// and loop-computable portions.
void
LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
+ // Mark uses whose expressions cannot be expanded.
+ if (!isSafeToExpand(S, SE))
+ LU.RigidFormula = true;
+
Formula F;
F.InitialMatch(S, L, SE);
bool Inserted = InsertFormula(LU, LUIdx, F);
@@ -3048,7 +3148,7 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
if (Remainder)
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
}
- return NULL;
+ return 0;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
if (AR->getStart()->isZero())
@@ -3060,7 +3160,7 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
// does not pertain to this loop.
if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
- Remainder = NULL;
+ Remainder = 0;
}
if (Remainder != AR->getStart()) {
if (!Remainder)
@@ -3082,7 +3182,7 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
if (Remainder)
Ops.push_back(SE.getMulExpr(C, Remainder));
- return NULL;
+ return 0;
}
}
return S;
@@ -3607,7 +3707,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
abs64(NewF.BaseOffset)) &&
(C->getValue()->getValue() +
NewF.BaseOffset).countTrailingZeros() >=
- CountTrailingZeros_64(NewF.BaseOffset))
+ countTrailingZeros<uint64_t>(NewF.BaseOffset))
goto skip_formula;
// Ok, looks good.
@@ -3690,7 +3790,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
// the corresponding bad register from the Regs set.
Cost CostF;
Regs.clear();
- CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT,
+ CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, LU.Offsets, SE, DT, LU,
&LoserRegs);
if (CostF.isLoser()) {
// During initial formula generation, undesirable formulae are generated
@@ -3726,7 +3826,8 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
Cost CostBest;
Regs.clear();
- CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
+ CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, LU.Offsets, SE,
+ DT, LU);
if (CostF < CostBest)
std::swap(F, Best);
DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
@@ -4079,7 +4180,8 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
// the current best, prune the search at that point.
NewCost = CurCost;
NewRegs = CurRegs;
- NewCost.RateFormula(F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT);
+ NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT,
+ LU);
if (NewCost < SolutionCost) {
Workspace.push_back(&F);
if (Workspace.size() != Uses.size()) {
@@ -4266,6 +4368,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts) const {
const LSRUse &LU = Uses[LF.LUIdx];
+ if (LU.RigidFormula)
+ return LF.OperandValToReplace;
// Determine an input position which will be dominated by the operands and
// which will dominate the result.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 80d060b..08ac38d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -49,12 +49,17 @@ namespace {
class LoopUnroll : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopUnroll(int T = -1, int C = -1, int P = -1) : LoopPass(ID) {
+ LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) {
CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);
CurrentCount = (C == -1) ? UnrollCount : unsigned(C);
CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
+ CurrentRuntime = (R == -1) ? UnrollRuntime : (bool)R;
UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
+ UserAllowPartial = (P != -1) ||
+ (UnrollAllowPartial.getNumOccurrences() > 0);
+ UserRuntime = (R != -1) || (UnrollRuntime.getNumOccurrences() > 0);
+ UserCount = (C != -1) || (UnrollCount.getNumOccurrences() > 0);
initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
}
@@ -75,7 +80,11 @@ namespace {
unsigned CurrentCount;
unsigned CurrentThreshold;
bool CurrentAllowPartial;
+ bool CurrentRuntime;
+ bool UserCount; // CurrentCount is user-specified.
bool UserThreshold; // CurrentThreshold is user-specified.
+ bool UserAllowPartial; // CurrentAllowPartial is user-specified.
+ bool UserRuntime; // CurrentRuntime is user-specified.
bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -110,8 +119,9 @@ INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
-Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) {
- return new LoopUnroll(Threshold, Count, AllowPartial);
+Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
+ int Runtime) {
+ return new LoopUnroll(Threshold, Count, AllowPartial, Runtime);
}
/// ApproximateLoopSize - Approximate the size of the loop.
@@ -145,16 +155,24 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
<< "] Loop %" << Header->getName() << "\n");
(void)Header;
+ TargetTransformInfo::UnrollingPreferences UP;
+ UP.Threshold = CurrentThreshold;
+ UP.OptSizeThreshold = OptSizeUnrollThreshold;
+ UP.Count = CurrentCount;
+ UP.Partial = CurrentAllowPartial;
+ UP.Runtime = CurrentRuntime;
+ TTI.getUnrollingPreferences(L, UP);
+
// Determine the current unrolling threshold. While this is normally set
// from UnrollThreshold, it is overridden to a smaller value if the current
// function is marked as optimize-for-size, and the unroll threshold was
// not user specified.
- unsigned Threshold = CurrentThreshold;
+ unsigned Threshold = UserThreshold ? CurrentThreshold : UP.Threshold;
if (!UserThreshold &&
Header->getParent()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex,
Attribute::OptimizeForSize))
- Threshold = OptSizeUnrollThreshold;
+ Threshold = UP.OptSizeThreshold;
// Find trip count and trip multiple if count is not available
unsigned TripCount = 0;
@@ -167,11 +185,14 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
}
+
+ bool Runtime = UserRuntime ? CurrentRuntime : UP.Runtime;
+
// Use a default unroll-count if the user doesn't specify a value
// and the trip count is a run-time value. The default is different
// for run-time or compile-time trip count loops.
- unsigned Count = CurrentCount;
- if (UnrollRuntime && CurrentCount == 0 && TripCount == 0)
+ unsigned Count = UserCount ? CurrentCount : UP.Count;
+ if (Runtime && Count == 0 && TripCount == 0)
Count = UnrollRuntimeCount;
if (Count == 0) {
@@ -204,7 +225,8 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
if (TripCount != 1 && Size > Threshold) {
DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
<< " because size: " << Size << ">" << Threshold << "\n");
- if (!CurrentAllowPartial && !(UnrollRuntime && TripCount == 0)) {
+ bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
+ if (!AllowPartial && !(Runtime && TripCount == 0)) {
DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
return false;
@@ -215,7 +237,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
while (Count != 0 && TripCount%Count != 0)
Count--;
}
- else if (UnrollRuntime) {
+ else if (Runtime) {
// Reduce unroll count to be a lower power-of-two value
while (Count != 0 && Size > Threshold) {
Count >>= 1;
@@ -231,7 +253,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Unroll the loop.
- if (!UnrollLoop(L, Count, TripCount, UnrollRuntime, TripMultiple, LI, &LPM))
+ if (!UnrollLoop(L, Count, TripCount, Runtime, TripMultiple, LI, &LPM))
return false;
return true;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 0e8199f..c4ebfd5 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -87,8 +87,8 @@ namespace {
typedef LoopPropsMap::iterator LoopPropsMapIt;
LoopPropsMap LoopsProperties;
- UnswitchedValsMap* CurLoopInstructions;
- LoopProperties* CurrentLoopProperties;
+ UnswitchedValsMap *CurLoopInstructions;
+ LoopProperties *CurrentLoopProperties;
// Max size of code we can produce on remained iterations.
unsigned MaxSize;
@@ -96,30 +96,30 @@ namespace {
public:
LUAnalysisCache() :
- CurLoopInstructions(NULL), CurrentLoopProperties(NULL),
+ CurLoopInstructions(0), CurrentLoopProperties(0),
MaxSize(Threshold)
{}
// Analyze loop. Check its size, calculate is it possible to unswitch
// it. Returns true if we can unswitch this loop.
- bool countLoop(const Loop* L, const TargetTransformInfo &TTI);
+ bool countLoop(const Loop *L, const TargetTransformInfo &TTI);
// Clean all data related to given loop.
- void forgetLoop(const Loop* L);
+ void forgetLoop(const Loop *L);
// Mark case value as unswitched.
// Since SI instruction can be partly unswitched, in order to avoid
// extra unswitching in cloned loops keep track all unswitched values.
- void setUnswitched(const SwitchInst* SI, const Value* V);
+ void setUnswitched(const SwitchInst *SI, const Value *V);
// Check was this case value unswitched before or not.
- bool isUnswitched(const SwitchInst* SI, const Value* V);
+ bool isUnswitched(const SwitchInst *SI, const Value *V);
// Clone all loop-unswitch related loop properties.
// Redistribute unswitching quotas.
// Note, that new loop data is stored inside the VMap.
- void cloneData(const Loop* NewLoop, const Loop* OldLoop,
- const ValueToValueMapTy& VMap);
+ void cloneData(const Loop *NewLoop, const Loop *OldLoop,
+ const ValueToValueMapTy &VMap);
};
class LoopUnswitch : public LoopPass {
@@ -151,8 +151,8 @@ namespace {
static char ID; // Pass ID, replacement for typeid
explicit LoopUnswitch(bool Os = false) :
LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
- currentLoop(NULL), DT(NULL), loopHeader(NULL),
- loopPreheader(NULL) {
+ currentLoop(0), DT(0), loopHeader(0),
+ loopPreheader(0) {
initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
}
@@ -196,7 +196,7 @@ namespace {
/// Split all of the edges from inside the loop to their exit blocks.
/// Update the appropriate Phi nodes as we do so.
- void SplitExitEdges(Loop *L, const SmallVector<BasicBlock *, 8> &ExitBlocks);
+ void SplitExitEdges(Loop *L, const SmallVectorImpl<BasicBlock *> &ExitBlocks);
bool UnswitchIfProfitable(Value *LoopCond, Constant *Val);
void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
@@ -212,8 +212,6 @@ namespace {
Instruction *InsertPt);
void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
- void RemoveBlockIfDead(BasicBlock *BB,
- std::vector<Instruction*> &Worklist, Loop *l);
void RemoveLoopFromHierarchy(Loop *L);
bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = 0,
BasicBlock **LoopExit = 0);
@@ -225,12 +223,14 @@ namespace {
// it. Returns true if we can unswitch this loop.
bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) {
- std::pair<LoopPropsMapIt, bool> InsertRes =
+ LoopPropsMapIt PropsIt;
+ bool Inserted;
+ llvm::tie(PropsIt, Inserted) =
LoopsProperties.insert(std::make_pair(L, LoopProperties()));
- LoopProperties& Props = InsertRes.first->second;
+ LoopProperties &Props = PropsIt->second;
- if (InsertRes.second) {
+ if (Inserted) {
// New loop.
// Limit the number of instructions to avoid causing significant code
@@ -242,8 +242,7 @@ bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) {
// consideration code simplification opportunities and code that can
// be shared by the resultant unswitched loops.
CodeMetrics Metrics;
- for (Loop::block_iterator I = L->block_begin(),
- E = L->block_end();
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
Metrics.analyzeBasicBlock(*I, TTI);
@@ -253,17 +252,16 @@ bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) {
if (Metrics.notDuplicatable) {
DEBUG(dbgs() << "NOT unswitching loop %"
- << L->getHeader()->getName() << ", contents cannot be "
- << "duplicated!\n");
+ << L->getHeader()->getName() << ", contents cannot be "
+ << "duplicated!\n");
return false;
}
}
if (!Props.CanBeUnswitchedCount) {
DEBUG(dbgs() << "NOT unswitching loop %"
- << L->getHeader()->getName() << ", cost too high: "
- << L->getBlocks().size() << "\n");
-
+ << L->getHeader()->getName() << ", cost too high: "
+ << L->getBlocks().size() << "\n");
return false;
}
@@ -275,41 +273,41 @@ bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) {
}
// Clean all data related to given loop.
-void LUAnalysisCache::forgetLoop(const Loop* L) {
+void LUAnalysisCache::forgetLoop(const Loop *L) {
LoopPropsMapIt LIt = LoopsProperties.find(L);
if (LIt != LoopsProperties.end()) {
- LoopProperties& Props = LIt->second;
+ LoopProperties &Props = LIt->second;
MaxSize += Props.CanBeUnswitchedCount * Props.SizeEstimation;
LoopsProperties.erase(LIt);
}
- CurrentLoopProperties = NULL;
- CurLoopInstructions = NULL;
+ CurrentLoopProperties = 0;
+ CurLoopInstructions = 0;
}
// Mark case value as unswitched.
// Since SI instruction can be partly unswitched, in order to avoid
// extra unswitching in cloned loops keep track all unswitched values.
-void LUAnalysisCache::setUnswitched(const SwitchInst* SI, const Value* V) {
+void LUAnalysisCache::setUnswitched(const SwitchInst *SI, const Value *V) {
(*CurLoopInstructions)[SI].insert(V);
}
// Check was this case value unswitched before or not.
-bool LUAnalysisCache::isUnswitched(const SwitchInst* SI, const Value* V) {
+bool LUAnalysisCache::isUnswitched(const SwitchInst *SI, const Value *V) {
return (*CurLoopInstructions)[SI].count(V);
}
// Clone all loop-unswitch related loop properties.
// Redistribute unswitching quotas.
// Note, that new loop data is stored inside the VMap.
-void LUAnalysisCache::cloneData(const Loop* NewLoop, const Loop* OldLoop,
- const ValueToValueMapTy& VMap) {
+void LUAnalysisCache::cloneData(const Loop *NewLoop, const Loop *OldLoop,
+ const ValueToValueMapTy &VMap) {
- LoopProperties& NewLoopProps = LoopsProperties[NewLoop];
- LoopProperties& OldLoopProps = *CurrentLoopProperties;
- UnswitchedValsMap& Insts = OldLoopProps.UnswitchedVals;
+ LoopProperties &NewLoopProps = LoopsProperties[NewLoop];
+ LoopProperties &OldLoopProps = *CurrentLoopProperties;
+ UnswitchedValsMap &Insts = OldLoopProps.UnswitchedVals;
// Reallocate "can-be-unswitched quota"
@@ -324,9 +322,9 @@ void LUAnalysisCache::cloneData(const Loop* NewLoop, const Loop* OldLoop,
// for new loop switches we clone info about values that was
// already unswitched and has redundant successors.
for (UnswitchedValsIt I = Insts.begin(); I != Insts.end(); ++I) {
- const SwitchInst* OldInst = I->first;
- Value* NewI = VMap.lookup(OldInst);
- const SwitchInst* NewInst = cast_or_null<SwitchInst>(NewI);
+ const SwitchInst *OldInst = I->first;
+ Value *NewI = VMap.lookup(OldInst);
+ const SwitchInst *NewInst = cast_or_null<SwitchInst>(NewI);
assert(NewInst && "All instructions that are in SrcBB must be in VMap.");
NewLoopProps.UnswitchedVals[NewInst] = OldLoopProps.UnswitchedVals[OldInst];
@@ -458,14 +456,14 @@ bool LoopUnswitch::processCurrentLoop() {
// Find a value to unswitch on:
// FIXME: this should chose the most expensive case!
// FIXME: scan for a case with a non-critical edge?
- Constant *UnswitchVal = NULL;
+ Constant *UnswitchVal = 0;
// Do not process same value again and again.
// At this point we have some cases already unswitched and
// some not yet unswitched. Let's find the first not yet unswitched one.
for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
i != e; ++i) {
- Constant* UnswitchValCandidate = i.getCaseValue();
+ Constant *UnswitchValCandidate = i.getCaseValue();
if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) {
UnswitchVal = UnswitchValCandidate;
break;
@@ -511,7 +509,8 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
// Already visited. Without more analysis, this could indicate an infinite
// loop.
return false;
- } else if (!L->contains(BB)) {
+ }
+ if (!L->contains(BB)) {
// Otherwise, this is a loop exit, this is fine so long as this is the
// first exit.
if (ExitBB != 0) return false;
@@ -595,11 +594,11 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
// on already unswitched cases.
for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
i != e; ++i) {
- BasicBlock* LoopExitCandidate;
+ BasicBlock *LoopExitCandidate;
if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop,
i.getCaseSuccessor()))) {
// Okay, we found a trivial case, remember the value that is trivial.
- ConstantInt* CaseVal = i.getCaseValue();
+ ConstantInt *CaseVal = i.getCaseValue();
// Check that it was not unswitched before, since already unswitched
// trivial vals are looks trivial too.
@@ -752,7 +751,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
/// SplitExitEdges - Split all of the edges from inside the loop to their exit
/// blocks. Update the appropriate Phi nodes as we do so.
void LoopUnswitch::SplitExitEdges(Loop *L,
- const SmallVector<BasicBlock *, 8> &ExitBlocks){
+ const SmallVectorImpl<BasicBlock *> &ExitBlocks){
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBlock = ExitBlocks[i];
@@ -854,9 +853,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
// If the successor of the exit block had PHI nodes, add an entry for
// NewExit.
- PHINode *PN;
- for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) {
- PN = cast<PHINode>(I);
+ for (BasicBlock::iterator I = ExitSucc->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
ValueToValueMapTy::iterator It = VMap.find(V);
if (It != VMap.end()) V = It->second;
@@ -864,8 +862,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
}
if (LandingPadInst *LPad = NewExit->getLandingPadInst()) {
- PN = PHINode::Create(LPad->getType(), 0, "",
- ExitSucc->getFirstInsertionPt());
+ PHINode *PN = PHINode::Create(LPad->getType(), 0, "",
+ ExitSucc->getFirstInsertionPt());
for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc);
I != E; ++I) {
@@ -946,117 +944,6 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V,
++NumSimplify;
}
-/// RemoveBlockIfDead - If the specified block is dead, remove it, update loop
-/// information, and remove any dead successors it has.
-///
-void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
- std::vector<Instruction*> &Worklist,
- Loop *L) {
- if (pred_begin(BB) != pred_end(BB)) {
- // This block isn't dead, since an edge to BB was just removed, see if there
- // are any easy simplifications we can do now.
- if (BasicBlock *Pred = BB->getSinglePredecessor()) {
- // If it has one pred, fold phi nodes in BB.
- while (isa<PHINode>(BB->begin()))
- ReplaceUsesOfWith(BB->begin(),
- cast<PHINode>(BB->begin())->getIncomingValue(0),
- Worklist, L, LPM);
-
- // If this is the header of a loop and the only pred is the latch, we now
- // have an unreachable loop.
- if (Loop *L = LI->getLoopFor(BB))
- if (loopHeader == BB && L->contains(Pred)) {
- // Remove the branch from the latch to the header block, this makes
- // the header dead, which will make the latch dead (because the header
- // dominates the latch).
- LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L);
- Pred->getTerminator()->eraseFromParent();
- new UnreachableInst(BB->getContext(), Pred);
-
- // The loop is now broken, remove it from LI.
- RemoveLoopFromHierarchy(L);
-
- // Reprocess the header, which now IS dead.
- RemoveBlockIfDead(BB, Worklist, L);
- return;
- }
-
- // If pred ends in a uncond branch, add uncond branch to worklist so that
- // the two blocks will get merged.
- if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
- if (BI->isUnconditional())
- Worklist.push_back(BI);
- }
- return;
- }
-
- DEBUG(dbgs() << "Nuking dead block: " << *BB);
-
- // Remove the instructions in the basic block from the worklist.
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- RemoveFromWorklist(I, Worklist);
-
- // Anything that uses the instructions in this basic block should have their
- // uses replaced with undefs.
- // If I is not void type then replaceAllUsesWith undef.
- // This allows ValueHandlers and custom metadata to adjust itself.
- if (!I->getType()->isVoidTy())
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
- }
-
- // If this is the edge to the header block for a loop, remove the loop and
- // promote all subloops.
- if (Loop *BBLoop = LI->getLoopFor(BB)) {
- if (BBLoop->getLoopLatch() == BB) {
- RemoveLoopFromHierarchy(BBLoop);
- if (currentLoop == BBLoop) {
- currentLoop = 0;
- redoLoop = false;
- }
- }
- }
-
- // Remove the block from the loop info, which removes it from any loops it
- // was in.
- LI->removeBlock(BB);
-
-
- // Remove phi node entries in successors for this block.
- TerminatorInst *TI = BB->getTerminator();
- SmallVector<BasicBlock*, 4> Succs;
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
- Succs.push_back(TI->getSuccessor(i));
- TI->getSuccessor(i)->removePredecessor(BB);
- }
-
- // Unique the successors, remove anything with multiple uses.
- array_pod_sort(Succs.begin(), Succs.end());
- Succs.erase(std::unique(Succs.begin(), Succs.end()), Succs.end());
-
- // Remove the basic block, including all of the instructions contained in it.
- LPM->deleteSimpleAnalysisValue(BB, L);
- BB->eraseFromParent();
- // Remove successor blocks here that are not dead, so that we know we only
- // have dead blocks in this list. Nondead blocks have a way of becoming dead,
- // then getting removed before we revisit them, which is badness.
- //
- for (unsigned i = 0; i != Succs.size(); ++i)
- if (pred_begin(Succs[i]) != pred_end(Succs[i])) {
- // One exception is loop headers. If this block was the preheader for a
- // loop, then we DO want to visit the loop so the loop gets deleted.
- // We know that if the successor is a loop header, that this loop had to
- // be the preheader: the case where this was the latch block was handled
- // above and headers can only have two predecessors.
- if (!LI->isLoopHeader(Succs[i])) {
- Succs.erase(Succs.begin()+i);
- --i;
- }
- }
-
- for (unsigned i = 0, e = Succs.size(); i != e; ++i)
- RemoveBlockIfDead(Succs[i], Worklist, L);
-}
-
/// RemoveLoopFromHierarchy - We have discovered that the specified loop has
/// become unwrapped, either because the backedge was deleted, or because the
/// edge into the header was removed. If the edge into the header from the
@@ -1088,7 +975,6 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
std::vector<Instruction*> Worklist;
LLVMContext &Context = Val->getContext();
-
// If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC
// in the loop with the appropriate one directly.
if (IsEqual || (isa<ConstantInt>(Val) &&
@@ -1108,8 +994,8 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
Worklist.push_back(U);
}
- for (std::vector<Instruction*>::iterator UI = Worklist.begin();
- UI != Worklist.end(); ++UI)
+ for (std::vector<Instruction*>::iterator UI = Worklist.begin(),
+ UE = Worklist.end(); UI != UE; ++UI)
(*UI)->replaceUsesOfWith(LIC, Replacement);
SimplifyCode(Worklist, L);
@@ -1266,23 +1152,6 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
continue;
}
- if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){
- // Conditional branch. Turn it into an unconditional branch, then
- // remove dead blocks.
- continue; // FIXME: Enable.
-
- DEBUG(dbgs() << "Folded branch: " << *BI);
- BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue());
- BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue());
- DeadSucc->removePredecessor(BI->getParent(), true);
- Worklist.push_back(BranchInst::Create(LiveSucc, BI));
- LPM->deleteSimpleAnalysisValue(BI, L);
- BI->eraseFromParent();
- RemoveFromWorklist(BI, Worklist);
- ++NumSimplify;
-
- RemoveBlockIfDead(DeadSucc, Worklist, L);
- }
continue;
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index be0f0e8..9912d3d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -170,14 +170,17 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const {
// pessimize the llvm optimizer.
//
// Since we don't have perfect knowledge here, make some assumptions: assume
- // the maximum GPR width is the same size as the pointer size and assume that
- // this width can be stored. If so, check to see whether we will end up
- // actually reducing the number of stores used.
+ // the maximum GPR width is the same size as the largest legal integer
+ // size. If so, check to see whether we will end up actually reducing the
+ // number of stores used.
unsigned Bytes = unsigned(End-Start);
- unsigned NumPointerStores = Bytes/TD.getPointerSize();
+ unsigned MaxIntSize = TD.getLargestLegalIntTypeSize();
+ if (MaxIntSize == 0)
+ MaxIntSize = 1;
+ unsigned NumPointerStores = Bytes / MaxIntSize;
// Assume the remaining bytes if any are done a byte at a time.
- unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize();
+ unsigned NumByteStores = Bytes - NumPointerStores * MaxIntSize;
// If we will reduce the # stores (according to this heuristic), do the
// transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
@@ -465,7 +468,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
// Zap all the stores.
- for (SmallVector<Instruction*, 16>::const_iterator
+ for (SmallVectorImpl<Instruction *>::const_iterator
SI = Range.TheStores.begin(),
SE = Range.TheStores.end(); SI != SE; ++SI) {
MD->removeInstruction(*SI);
@@ -626,8 +629,14 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
return false;
Type *StructTy = cast<PointerType>(A->getType())->getElementType();
- uint64_t destSize = TD->getTypeAllocSize(StructTy);
+ if (!StructTy->isSized()) {
+ // The call may never return and hence the copy-instruction may never
+ // be executed, and therefore it's not safe to say "the destination
+ // has at least <cpyLen> bytes, as implied by the copy-instruction",
+ return false;
+ }
+ uint64_t destSize = TD->getTypeAllocSize(StructTy);
if (destSize < srcSize)
return false;
} else {
diff --git a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
new file mode 100644
index 0000000..15cee44
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -0,0 +1,156 @@
+//===--- PartiallyInlineLibCalls.cpp - Partially inline libcalls ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass tries to partially inline the fast path of well-known library
+// functions, such as using square-root instructions for cases where sqrt()
+// does not need to set errno.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "partially-inline-libcalls"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+namespace {
+ class PartiallyInlineLibCalls : public FunctionPass {
+ public:
+ static char ID;
+
+ PartiallyInlineLibCalls() :
+ FunctionPass(ID) {
+ initializePartiallyInlineLibCallsPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool runOnFunction(Function &F);
+
+ private:
+ /// Optimize calls to sqrt.
+ bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
+ BasicBlock &CurrBB, Function::iterator &BB);
+ };
+
+ char PartiallyInlineLibCalls::ID = 0;
+}
+
+INITIALIZE_PASS(PartiallyInlineLibCalls, "partially-inline-libcalls",
+ "Partially inline calls to library functions", false, false)
+
+void PartiallyInlineLibCalls::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetTransformInfo>();
+ FunctionPass::getAnalysisUsage(AU);
+}
+
+bool PartiallyInlineLibCalls::runOnFunction(Function &F) {
+ bool Changed = false;
+ Function::iterator CurrBB;
+ TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ const TargetTransformInfo *TTI = &getAnalysis<TargetTransformInfo>();
+ for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) {
+ CurrBB = BB++;
+
+ for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end();
+ II != IE; ++II) {
+ CallInst *Call = dyn_cast<CallInst>(&*II);
+ Function *CalledFunc;
+
+ if (!Call || !(CalledFunc = Call->getCalledFunction()))
+ continue;
+
+ // Skip if function either has local linkage or is not a known library
+ // function.
+ LibFunc::Func LibFunc;
+ if (CalledFunc->hasLocalLinkage() || !CalledFunc->hasName() ||
+ !TLI->getLibFunc(CalledFunc->getName(), LibFunc))
+ continue;
+
+ switch (LibFunc) {
+ case LibFunc::sqrtf:
+ case LibFunc::sqrt:
+ if (TTI->haveFastSqrt(Call->getType()) &&
+ optimizeSQRT(Call, CalledFunc, *CurrBB, BB))
+ break;
+ continue;
+ default:
+ continue;
+ }
+
+ Changed = true;
+ break;
+ }
+ }
+
+ return Changed;
+}
+
+bool PartiallyInlineLibCalls::optimizeSQRT(CallInst *Call,
+ Function *CalledFunc,
+ BasicBlock &CurrBB,
+ Function::iterator &BB) {
+ // There is no need to change the IR, since backend will emit sqrt
+ // instruction if the call has already been marked read-only.
+ if (Call->onlyReadsMemory())
+ return false;
+
+ // Do the following transformation:
+ //
+ // (before)
+ // dst = sqrt(src)
+ //
+ // (after)
+ // v0 = sqrt_noreadmem(src) # native sqrt instruction.
+ // if (v0 is a NaN)
+ // v1 = sqrt(src) # library call.
+ // dst = phi(v0, v1)
+ //
+
+ // Move all instructions following Call to newly created block JoinBB.
+ // Create phi and replace all uses.
+ BasicBlock *JoinBB = llvm::SplitBlock(&CurrBB, Call->getNextNode(), this);
+ IRBuilder<> Builder(JoinBB, JoinBB->begin());
+ PHINode *Phi = Builder.CreatePHI(Call->getType(), 2);
+ Call->replaceAllUsesWith(Phi);
+
+ // Create basic block LibCallBB and insert a call to library function sqrt.
+ BasicBlock *LibCallBB = BasicBlock::Create(CurrBB.getContext(), "call.sqrt",
+ CurrBB.getParent(), JoinBB);
+ Builder.SetInsertPoint(LibCallBB);
+ Instruction *LibCall = Call->clone();
+ Builder.Insert(LibCall);
+ Builder.CreateBr(JoinBB);
+
+ // Add attribute "readnone" so that backend can use a native sqrt instruction
+ // for this call. Insert a FP compare instruction and a conditional branch
+ // at the end of CurrBB.
+ Call->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
+ CurrBB.getTerminator()->eraseFromParent();
+ Builder.SetInsertPoint(&CurrBB);
+ Value *FCmp = Builder.CreateFCmpOEQ(Call, Call);
+ Builder.CreateCondBr(FCmp, JoinBB, LibCallBB);
+
+ // Add phi operands.
+ Phi->addIncoming(Call, &CurrBB);
+ Phi->addIncoming(LibCall, LibCallBB);
+
+ BB = JoinBB;
+ return true;
+}
+
+FunctionPass *llvm::createPartiallyInlineLibCallsPass() {
+ return new PartiallyInlineLibCalls();
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index a3c241d..328a9c5 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -122,7 +122,6 @@ namespace {
class XorOpnd {
public:
XorOpnd(Value *V);
- const XorOpnd &operator=(const XorOpnd &That);
bool isInvalid() const { return SymbolicPart == 0; }
bool isOrExpr() const { return isOr; }
@@ -225,15 +224,6 @@ XorOpnd::XorOpnd(Value *V) {
isOr = true;
}
-const XorOpnd &XorOpnd::operator=(const XorOpnd &That) {
- OrigVal = That.OrigVal;
- SymbolicPart = That.SymbolicPart;
- ConstPart = That.ConstPart;
- SymbolicRank = That.SymbolicRank;
- isOr = That.isOr;
- return *this;
-}
-
char Reassociate::ID = 0;
INITIALIZE_PASS(Reassociate, "reassociate",
"Reassociate expressions", false, false)
@@ -251,21 +241,24 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
}
static bool isUnmovableInstruction(Instruction *I) {
- if (I->getOpcode() == Instruction::PHI ||
- I->getOpcode() == Instruction::LandingPad ||
- I->getOpcode() == Instruction::Alloca ||
- I->getOpcode() == Instruction::Load ||
- I->getOpcode() == Instruction::Invoke ||
- (I->getOpcode() == Instruction::Call &&
- !isa<DbgInfoIntrinsic>(I)) ||
- I->getOpcode() == Instruction::UDiv ||
- I->getOpcode() == Instruction::SDiv ||
- I->getOpcode() == Instruction::FDiv ||
- I->getOpcode() == Instruction::URem ||
- I->getOpcode() == Instruction::SRem ||
- I->getOpcode() == Instruction::FRem)
+ switch (I->getOpcode()) {
+ case Instruction::PHI:
+ case Instruction::LandingPad:
+ case Instruction::Alloca:
+ case Instruction::Load:
+ case Instruction::Invoke:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
return true;
- return false;
+ case Instruction::Call:
+ return !isa<DbgInfoIntrinsic>(I);
+ default:
+ return false;
+ }
}
void Reassociate::BuildRankMap(Function &F) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
index e30a274..4364720 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -214,7 +214,7 @@ public:
/// This returns true if the block was not considered live before.
bool MarkBlockExecutable(BasicBlock *BB) {
if (!BBExecutable.insert(BB)) return false;
- DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n");
+ DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << '\n');
BBWorkList.push_back(BB); // Add the block to the work list!
return true;
}
@@ -427,7 +427,7 @@ private:
// feasible that wasn't before. Revisit the PHI nodes in the block
// because they have potentially new operands.
DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName()
- << " -> " << Dest->getName() << "\n");
+ << " -> " << Dest->getName() << '\n');
PHINode *PN;
for (BasicBlock::iterator I = Dest->begin();
@@ -439,7 +439,7 @@ private:
// getFeasibleSuccessors - Return a vector of booleans to indicate which
// successors are reachable from a given terminator instruction.
//
- void getFeasibleSuccessors(TerminatorInst &TI, SmallVector<bool, 16> &Succs);
+ void getFeasibleSuccessors(TerminatorInst &TI, SmallVectorImpl<bool> &Succs);
// isEdgeFeasible - Return true if the control flow edge from the 'From' basic
// block to the 'To' basic block is currently feasible.
@@ -501,7 +501,7 @@ private:
void visitInstruction(Instruction &I) {
// If a new instruction is added to LLVM that we don't handle.
- dbgs() << "SCCP: Don't know how to handle: " << I;
+ dbgs() << "SCCP: Don't know how to handle: " << I << '\n';
markAnythingOverdefined(&I); // Just in case
}
};
@@ -513,7 +513,7 @@ private:
// successors are reachable from a given terminator instruction.
//
void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
- SmallVector<bool, 16> &Succs) {
+ SmallVectorImpl<bool> &Succs) {
Succs.resize(TI.getNumSuccessors());
if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) {
if (BI->isUnconditional()) {
@@ -1604,7 +1604,7 @@ bool SCCP::runOnFunction(Function &F) {
Constant *Const = IV.isConstant()
? IV.getConstant() : UndefValue::get(Inst->getType());
- DEBUG(dbgs() << " Constant: " << *Const << " = " << *Inst);
+ DEBUG(dbgs() << " Constant: " << *Const << " = " << *Inst << '\n');
// Replaces all of the uses of a variable with uses of the constant.
Inst->replaceAllUsesWith(Const);
@@ -1812,7 +1812,7 @@ bool IPSCCP::runOnModule(Module &M) {
Constant *Const = IV.isConstant()
? IV.getConstant() : UndefValue::get(Inst->getType());
- DEBUG(dbgs() << " Constant: " << *Const << " = " << *Inst);
+ DEBUG(dbgs() << " Constant: " << *Const << " = " << *Inst << '\n');
// Replaces all of the uses of a variable with uses of the
// constant.
diff --git a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
index d073e78..9f3fc83 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -47,6 +47,7 @@
#include "llvm/InstVisitor.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -58,9 +59,9 @@ using namespace llvm;
STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
-STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions");
-STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses found");
-STATISTIC(MaxPartitionUsesPerAlloca, "Maximum number of partition uses");
+STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca");
+STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses rewritten");
+STATISTIC(MaxUsesPerAllocaPartition, "Maximum number of uses of a partition");
STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
@@ -110,17 +111,39 @@ typedef llvm::IRBuilder<false, ConstantFolder,
}
namespace {
-/// \brief A common base class for representing a half-open byte range.
-struct ByteRange {
+/// \brief A used slice of an alloca.
+///
+/// This structure represents a slice of an alloca used by some instruction. It
+/// stores both the begin and end offsets of this use, a pointer to the use
+/// itself, and a flag indicating whether we can classify the use as splittable
+/// or not when forming partitions of the alloca.
+class Slice {
/// \brief The beginning offset of the range.
uint64_t BeginOffset;
/// \brief The ending offset, not included in the range.
uint64_t EndOffset;
- ByteRange() : BeginOffset(), EndOffset() {}
- ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
- : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
+ /// \brief Storage for both the use of this slice and whether it can be
+ /// split.
+ PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
+
+public:
+ Slice() : BeginOffset(), EndOffset() {}
+ Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable)
+ : BeginOffset(BeginOffset), EndOffset(EndOffset),
+ UseAndIsSplittable(U, IsSplittable) {}
+
+ uint64_t beginOffset() const { return BeginOffset; }
+ uint64_t endOffset() const { return EndOffset; }
+
+ bool isSplittable() const { return UseAndIsSplittable.getInt(); }
+ void makeUnsplittable() { UseAndIsSplittable.setInt(false); }
+
+ Use *getUse() const { return UseAndIsSplittable.getPointer(); }
+
+ bool isDead() const { return getUse() == 0; }
+ void kill() { UseAndIsSplittable.setPointer(0); }
/// \brief Support for ordering ranges.
///
@@ -128,173 +151,67 @@ struct ByteRange {
/// always increasing, and within equal start offsets, the end offsets are
/// decreasing. Thus the spanning range comes first in a cluster with the
/// same start position.
- bool operator<(const ByteRange &RHS) const {
- if (BeginOffset < RHS.BeginOffset) return true;
- if (BeginOffset > RHS.BeginOffset) return false;
- if (EndOffset > RHS.EndOffset) return true;
+ bool operator<(const Slice &RHS) const {
+ if (beginOffset() < RHS.beginOffset()) return true;
+ if (beginOffset() > RHS.beginOffset()) return false;
+ if (isSplittable() != RHS.isSplittable()) return !isSplittable();
+ if (endOffset() > RHS.endOffset()) return true;
return false;
}
/// \brief Support comparison with a single offset to allow binary searches.
- friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
- return LHS.BeginOffset < RHSOffset;
+ friend LLVM_ATTRIBUTE_UNUSED bool operator<(const Slice &LHS,
+ uint64_t RHSOffset) {
+ return LHS.beginOffset() < RHSOffset;
}
-
friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
- const ByteRange &RHS) {
- return LHSOffset < RHS.BeginOffset;
+ const Slice &RHS) {
+ return LHSOffset < RHS.beginOffset();
}
- bool operator==(const ByteRange &RHS) const {
- return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
+ bool operator==(const Slice &RHS) const {
+ return isSplittable() == RHS.isSplittable() &&
+ beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset();
}
- bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
+ bool operator!=(const Slice &RHS) const { return !operator==(RHS); }
};
-
-/// \brief A partition of an alloca.
-///
-/// This structure represents a contiguous partition of the alloca. These are
-/// formed by examining the uses of the alloca. During formation, they may
-/// overlap but once an AllocaPartitioning is built, the Partitions within it
-/// are all disjoint.
-struct Partition : public ByteRange {
- /// \brief Whether this partition is splittable into smaller partitions.
- ///
- /// We flag partitions as splittable when they are formed entirely due to
- /// accesses by trivially splittable operations such as memset and memcpy.
- bool IsSplittable;
-
- /// \brief Test whether a partition has been marked as dead.
- bool isDead() const {
- if (BeginOffset == UINT64_MAX) {
- assert(EndOffset == UINT64_MAX);
- return true;
- }
- return false;
- }
-
- /// \brief Kill a partition.
- /// This is accomplished by setting both its beginning and end offset to
- /// the maximum possible value.
- void kill() {
- assert(!isDead() && "He's Dead, Jim!");
- BeginOffset = EndOffset = UINT64_MAX;
- }
-
- Partition() : ByteRange(), IsSplittable() {}
- Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
- : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
-};
-
-/// \brief A particular use of a partition of the alloca.
-///
-/// This structure is used to associate uses of a partition with it. They
-/// mark the range of bytes which are referenced by a particular instruction,
-/// and includes a handle to the user itself and the pointer value in use.
-/// The bounds of these uses are determined by intersecting the bounds of the
-/// memory use itself with a particular partition. As a consequence there is
-/// intentionally overlap between various uses of the same partition.
-class PartitionUse : public ByteRange {
- /// \brief Combined storage for both the Use* and split state.
- PointerIntPair<Use*, 1, bool> UsePtrAndIsSplit;
-
-public:
- PartitionUse() : ByteRange(), UsePtrAndIsSplit() {}
- PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
- bool IsSplit)
- : ByteRange(BeginOffset, EndOffset), UsePtrAndIsSplit(U, IsSplit) {}
-
- /// \brief The use in question. Provides access to both user and used value.
- ///
- /// Note that this may be null if the partition use is *dead*, that is, it
- /// should be ignored.
- Use *getUse() const { return UsePtrAndIsSplit.getPointer(); }
-
- /// \brief Set the use for this partition use range.
- void setUse(Use *U) { UsePtrAndIsSplit.setPointer(U); }
-
- /// \brief Whether this use is split across multiple partitions.
- bool isSplit() const { return UsePtrAndIsSplit.getInt(); }
-};
-}
+} // end anonymous namespace
namespace llvm {
-template <> struct isPodLike<Partition> : llvm::true_type {};
-template <> struct isPodLike<PartitionUse> : llvm::true_type {};
+template <typename T> struct isPodLike;
+template <> struct isPodLike<Slice> {
+ static const bool value = true;
+};
}
namespace {
-/// \brief Alloca partitioning representation.
+/// \brief Representation of the alloca slices.
///
-/// This class represents a partitioning of an alloca into slices, and
-/// information about the nature of uses of each slice of the alloca. The goal
-/// is that this information is sufficient to decide if and how to split the
-/// alloca apart and replace slices with scalars. It is also intended that this
-/// structure can capture the relevant information needed both to decide about
-/// and to enact these transformations.
-class AllocaPartitioning {
+/// This class represents the slices of an alloca which are formed by its
+/// various uses. If a pointer escapes, we can't fully build a representation
+/// for the slices used and we reflect that in this structure. The uses are
+/// stored, sorted by increasing beginning offset and with unsplittable slices
+/// starting at a particular offset before splittable slices.
+class AllocaSlices {
public:
- /// \brief Construct a partitioning of a particular alloca.
- ///
- /// Construction does most of the work for partitioning the alloca. This
- /// performs the necessary walks of users and builds a partitioning from it.
- AllocaPartitioning(const DataLayout &TD, AllocaInst &AI);
+ /// \brief Construct the slices of a particular alloca.
+ AllocaSlices(const DataLayout &DL, AllocaInst &AI);
/// \brief Test whether a pointer to the allocation escapes our analysis.
///
- /// If this is true, the partitioning is never fully built and should be
+ /// If this is true, the slices are never fully built and should be
/// ignored.
bool isEscaped() const { return PointerEscapingInstr; }
- /// \brief Support for iterating over the partitions.
+ /// \brief Support for iterating over the slices.
/// @{
- typedef SmallVectorImpl<Partition>::iterator iterator;
- iterator begin() { return Partitions.begin(); }
- iterator end() { return Partitions.end(); }
+ typedef SmallVectorImpl<Slice>::iterator iterator;
+ iterator begin() { return Slices.begin(); }
+ iterator end() { return Slices.end(); }
- typedef SmallVectorImpl<Partition>::const_iterator const_iterator;
- const_iterator begin() const { return Partitions.begin(); }
- const_iterator end() const { return Partitions.end(); }
- /// @}
-
- /// \brief Support for iterating over and manipulating a particular
- /// partition's uses.
- ///
- /// The iteration support provided for uses is more limited, but also
- /// includes some manipulation routines to support rewriting the uses of
- /// partitions during SROA.
- /// @{
- typedef SmallVectorImpl<PartitionUse>::iterator use_iterator;
- use_iterator use_begin(unsigned Idx) { return Uses[Idx].begin(); }
- use_iterator use_begin(const_iterator I) { return Uses[I - begin()].begin(); }
- use_iterator use_end(unsigned Idx) { return Uses[Idx].end(); }
- use_iterator use_end(const_iterator I) { return Uses[I - begin()].end(); }
-
- typedef SmallVectorImpl<PartitionUse>::const_iterator const_use_iterator;
- const_use_iterator use_begin(unsigned Idx) const { return Uses[Idx].begin(); }
- const_use_iterator use_begin(const_iterator I) const {
- return Uses[I - begin()].begin();
- }
- const_use_iterator use_end(unsigned Idx) const { return Uses[Idx].end(); }
- const_use_iterator use_end(const_iterator I) const {
- return Uses[I - begin()].end();
- }
-
- unsigned use_size(unsigned Idx) const { return Uses[Idx].size(); }
- unsigned use_size(const_iterator I) const { return Uses[I - begin()].size(); }
- const PartitionUse &getUse(unsigned PIdx, unsigned UIdx) const {
- return Uses[PIdx][UIdx];
- }
- const PartitionUse &getUse(const_iterator I, unsigned UIdx) const {
- return Uses[I - begin()][UIdx];
- }
-
- void use_push_back(unsigned Idx, const PartitionUse &PU) {
- Uses[Idx].push_back(PU);
- }
- void use_push_back(const_iterator I, const PartitionUse &PU) {
- Uses[I - begin()].push_back(PU);
- }
+ typedef SmallVectorImpl<Slice>::const_iterator const_iterator;
+ const_iterator begin() const { return Slices.begin(); }
+ const_iterator end() const { return Slices.end(); }
/// @}
/// \brief Allow iterating the dead users for this alloca.
@@ -320,66 +237,12 @@ public:
dead_op_iterator dead_op_end() const { return DeadOperands.end(); }
/// @}
- /// \brief MemTransferInst auxiliary data.
- /// This struct provides some auxiliary data about memory transfer
- /// intrinsics such as memcpy and memmove. These intrinsics can use two
- /// different ranges within the same alloca, and provide other challenges to
- /// correctly represent. We stash extra data to help us untangle this
- /// after the partitioning is complete.
- struct MemTransferOffsets {
- /// The destination begin and end offsets when the destination is within
- /// this alloca. If the end offset is zero the destination is not within
- /// this alloca.
- uint64_t DestBegin, DestEnd;
-
- /// The source begin and end offsets when the source is within this alloca.
- /// If the end offset is zero, the source is not within this alloca.
- uint64_t SourceBegin, SourceEnd;
-
- /// Flag for whether an alloca is splittable.
- bool IsSplittable;
- };
- MemTransferOffsets getMemTransferOffsets(MemTransferInst &II) const {
- return MemTransferInstData.lookup(&II);
- }
-
- /// \brief Map from a PHI or select operand back to a partition.
- ///
- /// When manipulating PHI nodes or selects, they can use more than one
- /// partition of an alloca. We store a special mapping to allow finding the
- /// partition referenced by each of these operands, if any.
- iterator findPartitionForPHIOrSelectOperand(Use *U) {
- SmallDenseMap<Use *, std::pair<unsigned, unsigned> >::const_iterator MapIt
- = PHIOrSelectOpMap.find(U);
- if (MapIt == PHIOrSelectOpMap.end())
- return end();
-
- return begin() + MapIt->second.first;
- }
-
- /// \brief Map from a PHI or select operand back to the specific use of
- /// a partition.
- ///
- /// Similar to mapping these operands back to the partitions, this maps
- /// directly to the use structure of that partition.
- use_iterator findPartitionUseForPHIOrSelectOperand(Use *U) {
- SmallDenseMap<Use *, std::pair<unsigned, unsigned> >::const_iterator MapIt
- = PHIOrSelectOpMap.find(U);
- assert(MapIt != PHIOrSelectOpMap.end());
- return Uses[MapIt->second.first].begin() + MapIt->second.second;
- }
-
- /// \brief Compute a common type among the uses of a particular partition.
- ///
- /// This routines walks all of the uses of a particular partition and tries
- /// to find a common type between them. Untyped operations such as memset and
- /// memcpy are ignored.
- Type *getCommonType(iterator I) const;
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;
- void printUsers(raw_ostream &OS, const_iterator I,
+ void printSlice(raw_ostream &OS, const_iterator I,
StringRef Indent = " ") const;
+ void printUse(raw_ostream &OS, const_iterator I,
+ StringRef Indent = " ") const;
void print(raw_ostream &OS) const;
void LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED dump(const_iterator I) const;
void LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED dump() const;
@@ -387,47 +250,36 @@ public:
private:
template <typename DerivedT, typename RetT = void> class BuilderBase;
- class PartitionBuilder;
- friend class AllocaPartitioning::PartitionBuilder;
- class UseBuilder;
- friend class AllocaPartitioning::UseBuilder;
+ class SliceBuilder;
+ friend class AllocaSlices::SliceBuilder;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// \brief Handle to alloca instruction to simplify method interfaces.
AllocaInst &AI;
#endif
- /// \brief The instruction responsible for this alloca having no partitioning.
+ /// \brief The instruction responsible for this alloca not having a known set
+ /// of slices.
///
/// When an instruction (potentially) escapes the pointer to the alloca, we
- /// store a pointer to that here and abort trying to partition the alloca.
- /// This will be null if the alloca is partitioned successfully.
+ /// store a pointer to that here and abort trying to form slices of the
+ /// alloca. This will be null if the alloca slices are analyzed successfully.
Instruction *PointerEscapingInstr;
- /// \brief The partitions of the alloca.
+ /// \brief The slices of the alloca.
///
- /// We store a vector of the partitions over the alloca here. This vector is
- /// sorted by increasing begin offset, and then by decreasing end offset. See
- /// the Partition inner class for more details. Initially (during
- /// construction) there are overlaps, but we form a disjoint sequence of
- /// partitions while finishing construction and a fully constructed object is
- /// expected to always have this as a disjoint space.
- SmallVector<Partition, 8> Partitions;
-
- /// \brief The uses of the partitions.
- ///
- /// This is essentially a mapping from each partition to a list of uses of
- /// that partition. The mapping is done with a Uses vector that has the exact
- /// same number of entries as the partition vector. Each entry is itself
- /// a vector of the uses.
- SmallVector<SmallVector<PartitionUse, 2>, 8> Uses;
+ /// We store a vector of the slices formed by uses of the alloca here. This
+ /// vector is sorted by increasing begin offset, and then the unsplittable
+ /// slices before the splittable ones. See the Slice inner class for more
+ /// details.
+ SmallVector<Slice, 8> Slices;
/// \brief Instructions which will become dead if we rewrite the alloca.
///
- /// Note that these are not separated by partition. This is because we expect
- /// a partitioned alloca to be completely rewritten or not rewritten at all.
- /// If rewritten, all these instructions can simply be removed and replaced
- /// with undef as they come from outside of the allocated space.
+ /// Note that these are not separated by slice. This is because we expect an
+ /// alloca to be completely rewritten or not rewritten at all. If rewritten,
+ /// all these instructions can simply be removed and replaced with undef as
+ /// they come from outside of the allocated space.
SmallVector<Instruction *, 8> DeadUsers;
/// \brief Operands which will become dead if we rewrite the alloca.
@@ -439,26 +291,6 @@ private:
/// want to swap this particular input for undef to simplify the use lists of
/// the alloca.
SmallVector<Use *, 8> DeadOperands;
-
- /// \brief The underlying storage for auxiliary memcpy and memset info.
- SmallDenseMap<MemTransferInst *, MemTransferOffsets, 4> MemTransferInstData;
-
- /// \brief A side datastructure used when building up the partitions and uses.
- ///
- /// This mapping is only really used during the initial building of the
- /// partitioning so that we can retain information about PHI and select nodes
- /// processed.
- SmallDenseMap<Instruction *, std::pair<uint64_t, bool> > PHIOrSelectSizes;
-
- /// \brief Auxiliary information for particular PHI or select operands.
- SmallDenseMap<Use *, std::pair<unsigned, unsigned>, 4> PHIOrSelectOpMap;
-
- /// \brief A utility routine called from the constructor.
- ///
- /// This does what it says on the tin. It is the key of the alloca partition
- /// splitting and merging. After it is called we have the desired disjoint
- /// collection of partitions.
- void splitAndMergePartitions();
};
}
@@ -474,29 +306,35 @@ static Value *foldSelectInst(SelectInst &SI) {
return 0;
}
-/// \brief Builder for the alloca partitioning.
+/// \brief Builder for the alloca slices.
///
-/// This class builds an alloca partitioning by recursively visiting the uses
-/// of an alloca and splitting the partitions for each load and store at each
-/// offset.
-class AllocaPartitioning::PartitionBuilder
- : public PtrUseVisitor<PartitionBuilder> {
- friend class PtrUseVisitor<PartitionBuilder>;
- friend class InstVisitor<PartitionBuilder>;
- typedef PtrUseVisitor<PartitionBuilder> Base;
+/// This class builds a set of alloca slices by recursively visiting the uses
+/// of an alloca and making a slice for each load and store at each offset.
+class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
+ friend class PtrUseVisitor<SliceBuilder>;
+ friend class InstVisitor<SliceBuilder>;
+ typedef PtrUseVisitor<SliceBuilder> Base;
const uint64_t AllocSize;
- AllocaPartitioning &P;
+ AllocaSlices &S;
+
+ SmallDenseMap<Instruction *, unsigned> MemTransferSliceMap;
+ SmallDenseMap<Instruction *, uint64_t> PHIOrSelectSizes;
- SmallDenseMap<Instruction *, unsigned> MemTransferPartitionMap;
+ /// \brief Set to de-duplicate dead instructions found in the use walk.
+ SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
public:
- PartitionBuilder(const DataLayout &DL, AllocaInst &AI, AllocaPartitioning &P)
- : PtrUseVisitor<PartitionBuilder>(DL),
- AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())),
- P(P) {}
+ SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &S)
+ : PtrUseVisitor<SliceBuilder>(DL),
+ AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), S(S) {}
private:
+ void markAsDead(Instruction &I) {
+ if (VisitedDeadInsts.insert(&I))
+ S.DeadUsers.push_back(&I);
+ }
+
void insertUse(Instruction &I, const APInt &Offset, uint64_t Size,
bool IsSplittable = false) {
// Completely skip uses which have a zero size or start either before or
@@ -505,9 +343,9 @@ private:
DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset
<< " which has zero size or starts outside of the "
<< AllocSize << " byte alloca:\n"
- << " alloca: " << P.AI << "\n"
+ << " alloca: " << S.AI << "\n"
<< " use: " << I << "\n");
- return;
+ return markAsDead(I);
}
uint64_t BeginOffset = Offset.getZExtValue();
@@ -523,13 +361,26 @@ private:
if (Size > AllocSize - BeginOffset) {
DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
<< " to remain within the " << AllocSize << " byte alloca:\n"
- << " alloca: " << P.AI << "\n"
+ << " alloca: " << S.AI << "\n"
<< " use: " << I << "\n");
EndOffset = AllocSize;
}
- Partition New(BeginOffset, EndOffset, IsSplittable);
- P.Partitions.push_back(New);
+ S.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
+ }
+
+ void visitBitCastInst(BitCastInst &BC) {
+ if (BC.use_empty())
+ return markAsDead(BC);
+
+ return Base::visitBitCastInst(BC);
+ }
+
+ void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+ if (GEPI.use_empty())
+ return markAsDead(GEPI);
+
+ return Base::visitGetElementPtrInst(GEPI);
}
void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
@@ -580,9 +431,9 @@ private:
DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset
<< " which extends past the end of the " << AllocSize
<< " byte alloca:\n"
- << " alloca: " << P.AI << "\n"
+ << " alloca: " << S.AI << "\n"
<< " use: " << SI << "\n");
- return;
+ return markAsDead(SI);
}
assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
@@ -597,7 +448,7 @@ private:
if ((Length && Length->getValue() == 0) ||
(IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
// Zero-length mem transfer intrinsics can be ignored entirely.
- return;
+ return markAsDead(II);
if (!IsOffsetKnown)
return PI.setAborted(&II);
@@ -613,7 +464,7 @@ private:
if ((Length && Length->getValue() == 0) ||
(IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
// Zero-length mem transfer intrinsics can be ignored entirely.
- return;
+ return markAsDead(II);
if (!IsOffsetKnown)
return PI.setAborted(&II);
@@ -622,63 +473,44 @@ private:
uint64_t Size = Length ? Length->getLimitedValue()
: AllocSize - RawOffset;
- MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
-
- // Only intrinsics with a constant length can be split.
- Offsets.IsSplittable = Length;
+ // Check for the special case where the same exact value is used for both
+ // source and dest.
+ if (*U == II.getRawDest() && *U == II.getRawSource()) {
+ // For non-volatile transfers this is a no-op.
+ if (!II.isVolatile())
+ return markAsDead(II);
- if (*U == II.getRawDest()) {
- Offsets.DestBegin = RawOffset;
- Offsets.DestEnd = RawOffset + Size;
- }
- if (*U == II.getRawSource()) {
- Offsets.SourceBegin = RawOffset;
- Offsets.SourceEnd = RawOffset + Size;
+ return insertUse(II, Offset, Size, /*IsSplittable=*/false);
}
- // If we have set up end offsets for both the source and the destination,
- // we have found both sides of this transfer pointing at the same alloca.
- bool SeenBothEnds = Offsets.SourceEnd && Offsets.DestEnd;
- if (SeenBothEnds && II.getRawDest() != II.getRawSource()) {
- unsigned PrevIdx = MemTransferPartitionMap[&II];
+ // If we have seen both source and destination for a mem transfer, then
+ // they both point to the same alloca.
+ bool Inserted;
+ SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
+ llvm::tie(MTPI, Inserted) =
+ MemTransferSliceMap.insert(std::make_pair(&II, S.Slices.size()));
+ unsigned PrevIdx = MTPI->second;
+ if (!Inserted) {
+ Slice &PrevP = S.Slices[PrevIdx];
// Check if the begin offsets match and this is a non-volatile transfer.
// In that case, we can completely elide the transfer.
- if (!II.isVolatile() && Offsets.SourceBegin == Offsets.DestBegin) {
- P.Partitions[PrevIdx].kill();
- return;
+ if (!II.isVolatile() && PrevP.beginOffset() == RawOffset) {
+ PrevP.kill();
+ return markAsDead(II);
}
// Otherwise we have an offset transfer within the same alloca. We can't
// split those.
- P.Partitions[PrevIdx].IsSplittable = Offsets.IsSplittable = false;
- } else if (SeenBothEnds) {
- // Handle the case where this exact use provides both ends of the
- // operation.
- assert(II.getRawDest() == II.getRawSource());
-
- // For non-volatile transfers this is a no-op.
- if (!II.isVolatile())
- return;
-
- // Otherwise just suppress splitting.
- Offsets.IsSplittable = false;
+ PrevP.makeUnsplittable();
}
-
// Insert the use now that we've fixed up the splittable nature.
- insertUse(II, Offset, Size, Offsets.IsSplittable);
-
- // Setup the mapping from intrinsic to partition of we've not seen both
- // ends of this transfer.
- if (!SeenBothEnds) {
- unsigned NewIdx = P.Partitions.size() - 1;
- bool Inserted
- = MemTransferPartitionMap.insert(std::make_pair(&II, NewIdx)).second;
- assert(Inserted &&
- "Already have intrinsic in map but haven't seen both ends");
- (void)Inserted;
- }
+ insertUse(II, Offset, Size, /*IsSplittable=*/Inserted && Length);
+
+ // Check that we ended up with a valid index in the map.
+ assert(S.Slices[PrevIdx].getUse()->getUser() == &II &&
+ "Map index doesn't point back to a slice with this user.");
}
// Disable SRoA for any intrinsics except for lifetime invariants.
@@ -702,7 +534,7 @@ private:
Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {
// We consider any PHI or select that results in a direct load or store of
- // the same offset to be a viable use for partitioning purposes. These uses
+ // the same offset to be a viable use for slicing purposes. These uses
// are considered unsplittable and the size is the maximum loaded or stored
// size.
SmallPtrSet<Instruction *, 4> Visited;
@@ -747,234 +579,36 @@ private:
void visitPHINode(PHINode &PN) {
if (PN.use_empty())
- return;
+ return markAsDead(PN);
if (!IsOffsetKnown)
return PI.setAborted(&PN);
// See if we already have computed info on this node.
- std::pair<uint64_t, bool> &PHIInfo = P.PHIOrSelectSizes[&PN];
- if (PHIInfo.first) {
- PHIInfo.second = true;
- insertUse(PN, Offset, PHIInfo.first);
- return;
+ uint64_t &PHISize = PHIOrSelectSizes[&PN];
+ if (!PHISize) {
+ // This is a new PHI node, check for an unsafe use of the PHI node.
+ if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&PN, PHISize))
+ return PI.setAborted(UnsafeI);
}
- // Check for an unsafe use of the PHI node.
- if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&PN, PHIInfo.first))
- return PI.setAborted(UnsafeI);
-
- insertUse(PN, Offset, PHIInfo.first);
- }
-
- void visitSelectInst(SelectInst &SI) {
- if (SI.use_empty())
- return;
- if (Value *Result = foldSelectInst(SI)) {
- if (Result == *U)
- // If the result of the constant fold will be the pointer, recurse
- // through the select as if we had RAUW'ed it.
- enqueueUsers(SI);
-
- return;
- }
- if (!IsOffsetKnown)
- return PI.setAborted(&SI);
-
- // See if we already have computed info on this node.
- std::pair<uint64_t, bool> &SelectInfo = P.PHIOrSelectSizes[&SI];
- if (SelectInfo.first) {
- SelectInfo.second = true;
- insertUse(SI, Offset, SelectInfo.first);
- return;
- }
-
- // Check for an unsafe use of the PHI node.
- if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&SI, SelectInfo.first))
- return PI.setAborted(UnsafeI);
-
- insertUse(SI, Offset, SelectInfo.first);
- }
-
- /// \brief Disable SROA entirely if there are unhandled users of the alloca.
- void visitInstruction(Instruction &I) {
- PI.setAborted(&I);
- }
-};
-
-/// \brief Use adder for the alloca partitioning.
-///
-/// This class adds the uses of an alloca to all of the partitions which they
-/// use. For splittable partitions, this can end up doing essentially a linear
-/// walk of the partitions, but the number of steps remains bounded by the
-/// total result instruction size:
-/// - The number of partitions is a result of the number unsplittable
-/// instructions using the alloca.
-/// - The number of users of each partition is at worst the total number of
-/// splittable instructions using the alloca.
-/// Thus we will produce N * M instructions in the end, where N are the number
-/// of unsplittable uses and M are the number of splittable. This visitor does
-/// the exact same number of updates to the partitioning.
-///
-/// In the more common case, this visitor will leverage the fact that the
-/// partition space is pre-sorted, and do a logarithmic search for the
-/// partition needed, making the total visit a classical ((N + M) * log(N))
-/// complexity operation.
-class AllocaPartitioning::UseBuilder : public PtrUseVisitor<UseBuilder> {
- friend class PtrUseVisitor<UseBuilder>;
- friend class InstVisitor<UseBuilder>;
- typedef PtrUseVisitor<UseBuilder> Base;
-
- const uint64_t AllocSize;
- AllocaPartitioning &P;
-
- /// \brief Set to de-duplicate dead instructions found in the use walk.
- SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
-
-public:
- UseBuilder(const DataLayout &TD, AllocaInst &AI, AllocaPartitioning &P)
- : PtrUseVisitor<UseBuilder>(TD),
- AllocSize(TD.getTypeAllocSize(AI.getAllocatedType())),
- P(P) {}
-
-private:
- void markAsDead(Instruction &I) {
- if (VisitedDeadInsts.insert(&I))
- P.DeadUsers.push_back(&I);
- }
-
- void insertUse(Instruction &User, const APInt &Offset, uint64_t Size) {
- // If the use has a zero size or extends outside of the allocation, record
- // it as a dead use for elimination later.
- if (Size == 0 || Offset.isNegative() || Offset.uge(AllocSize))
- return markAsDead(User);
-
- uint64_t BeginOffset = Offset.getZExtValue();
- uint64_t EndOffset = BeginOffset + Size;
-
- // Clamp the end offset to the end of the allocation. Note that this is
- // formulated to handle even the case where "BeginOffset + Size" overflows.
- assert(AllocSize >= BeginOffset); // Established above.
- if (Size > AllocSize - BeginOffset)
- EndOffset = AllocSize;
-
- // NB: This only works if we have zero overlapping partitions.
- iterator I = std::lower_bound(P.begin(), P.end(), BeginOffset);
- if (I != P.begin() && llvm::prior(I)->EndOffset > BeginOffset)
- I = llvm::prior(I);
- iterator E = P.end();
- bool IsSplit = llvm::next(I) != E && llvm::next(I)->BeginOffset < EndOffset;
- for (; I != E && I->BeginOffset < EndOffset; ++I) {
- PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset),
- std::min(I->EndOffset, EndOffset), U, IsSplit);
- P.use_push_back(I, NewPU);
- if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser()))
- P.PHIOrSelectOpMap[U]
- = std::make_pair(I - P.begin(), P.Uses[I - P.begin()].size() - 1);
- }
- }
-
- void visitBitCastInst(BitCastInst &BC) {
- if (BC.use_empty())
- return markAsDead(BC);
-
- return Base::visitBitCastInst(BC);
- }
-
- void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
- if (GEPI.use_empty())
- return markAsDead(GEPI);
-
- return Base::visitGetElementPtrInst(GEPI);
- }
-
- void visitLoadInst(LoadInst &LI) {
- assert(IsOffsetKnown);
- uint64_t Size = DL.getTypeStoreSize(LI.getType());
- insertUse(LI, Offset, Size);
- }
-
- void visitStoreInst(StoreInst &SI) {
- assert(IsOffsetKnown);
- uint64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType());
-
- // If this memory access can be shown to *statically* extend outside the
- // bounds of of the allocation, it's behavior is undefined, so simply
- // ignore it. Note that this is more strict than the generic clamping
- // behavior of insertUse.
- if (Offset.isNegative() || Size > AllocSize ||
- Offset.ugt(AllocSize - Size))
- return markAsDead(SI);
-
- insertUse(SI, Offset, Size);
- }
-
- void visitMemSetInst(MemSetInst &II) {
- ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
- if ((Length && Length->getValue() == 0) ||
- (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
- return markAsDead(II);
-
- assert(IsOffsetKnown);
- insertUse(II, Offset, Length ? Length->getLimitedValue()
- : AllocSize - Offset.getLimitedValue());
- }
-
- void visitMemTransferInst(MemTransferInst &II) {
- ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
- if ((Length && Length->getValue() == 0) ||
- (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
- return markAsDead(II);
-
- assert(IsOffsetKnown);
- uint64_t Size = Length ? Length->getLimitedValue()
- : AllocSize - Offset.getLimitedValue();
-
- const MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
- if (!II.isVolatile() && Offsets.DestEnd && Offsets.SourceEnd &&
- Offsets.DestBegin == Offsets.SourceBegin)
- return markAsDead(II); // Skip identity transfers without side-effects.
-
- insertUse(II, Offset, Size);
- }
-
- void visitIntrinsicInst(IntrinsicInst &II) {
- assert(IsOffsetKnown);
- assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||
- II.getIntrinsicID() == Intrinsic::lifetime_end);
-
- ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
- insertUse(II, Offset, std::min(Length->getLimitedValue(),
- AllocSize - Offset.getLimitedValue()));
- }
-
- void insertPHIOrSelect(Instruction &User, const APInt &Offset) {
- uint64_t Size = P.PHIOrSelectSizes.lookup(&User).first;
-
// For PHI and select operands outside the alloca, we can't nuke the entire
// phi or select -- the other side might still be relevant, so we special
// case them here and use a separate structure to track the operands
// themselves which should be replaced with undef.
- if ((Offset.isNegative() && Offset.uge(Size)) ||
+ // FIXME: This should instead be escaped in the event we're instrumenting
+ // for address sanitization.
+ if ((Offset.isNegative() && (-Offset).uge(PHISize)) ||
(!Offset.isNegative() && Offset.uge(AllocSize))) {
- P.DeadOperands.push_back(U);
+ S.DeadOperands.push_back(U);
return;
}
- insertUse(User, Offset, Size);
- }
-
- void visitPHINode(PHINode &PN) {
- if (PN.use_empty())
- return markAsDead(PN);
-
- assert(IsOffsetKnown);
- insertPHIOrSelect(PN, Offset);
+ insertUse(PN, Offset, PHISize);
}
void visitSelectInst(SelectInst &SI) {
if (SI.use_empty())
return markAsDead(SI);
-
if (Value *Result = foldSelectInst(SI)) {
if (Result == *U)
// If the result of the constant fold will be the pointer, recurse
@@ -983,276 +617,106 @@ private:
else
// Otherwise the operand to the select is dead, and we can replace it
// with undef.
- P.DeadOperands.push_back(U);
+ S.DeadOperands.push_back(U);
return;
}
+ if (!IsOffsetKnown)
+ return PI.setAborted(&SI);
- assert(IsOffsetKnown);
- insertPHIOrSelect(SI, Offset);
- }
-
- /// \brief Unreachable, we've already visited the alloca once.
- void visitInstruction(Instruction &I) {
- llvm_unreachable("Unhandled instruction in use builder.");
- }
-};
-
-void AllocaPartitioning::splitAndMergePartitions() {
- size_t NumDeadPartitions = 0;
-
- // Track the range of splittable partitions that we pass when accumulating
- // overlapping unsplittable partitions.
- uint64_t SplitEndOffset = 0ull;
-
- Partition New(0ull, 0ull, false);
-
- for (unsigned i = 0, j = i, e = Partitions.size(); i != e; i = j) {
- ++j;
-
- if (!Partitions[i].IsSplittable || New.BeginOffset == New.EndOffset) {
- assert(New.BeginOffset == New.EndOffset);
- New = Partitions[i];
- } else {
- assert(New.IsSplittable);
- New.EndOffset = std::max(New.EndOffset, Partitions[i].EndOffset);
- }
- assert(New.BeginOffset != New.EndOffset);
-
- // Scan the overlapping partitions.
- while (j != e && New.EndOffset > Partitions[j].BeginOffset) {
- // If the new partition we are forming is splittable, stop at the first
- // unsplittable partition.
- if (New.IsSplittable && !Partitions[j].IsSplittable)
- break;
-
- // Grow the new partition to include any equally splittable range. 'j' is
- // always equally splittable when New is splittable, but when New is not
- // splittable, we may subsume some (or part of some) splitable partition
- // without growing the new one.
- if (New.IsSplittable == Partitions[j].IsSplittable) {
- New.EndOffset = std::max(New.EndOffset, Partitions[j].EndOffset);
- } else {
- assert(!New.IsSplittable);
- assert(Partitions[j].IsSplittable);
- SplitEndOffset = std::max(SplitEndOffset, Partitions[j].EndOffset);
- }
-
- Partitions[j].kill();
- ++NumDeadPartitions;
- ++j;
- }
-
- // If the new partition is splittable, chop off the end as soon as the
- // unsplittable subsequent partition starts and ensure we eventually cover
- // the splittable area.
- if (j != e && New.IsSplittable) {
- SplitEndOffset = std::max(SplitEndOffset, New.EndOffset);
- New.EndOffset = std::min(New.EndOffset, Partitions[j].BeginOffset);
+ // See if we already have computed info on this node.
+ uint64_t &SelectSize = PHIOrSelectSizes[&SI];
+ if (!SelectSize) {
+ // This is a new Select, check for an unsafe use of it.
+ if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&SI, SelectSize))
+ return PI.setAborted(UnsafeI);
}
- // Add the new partition if it differs from the original one and is
- // non-empty. We can end up with an empty partition here if it was
- // splittable but there is an unsplittable one that starts at the same
- // offset.
- if (New != Partitions[i]) {
- if (New.BeginOffset != New.EndOffset)
- Partitions.push_back(New);
- // Mark the old one for removal.
- Partitions[i].kill();
- ++NumDeadPartitions;
+ // For PHI and select operands outside the alloca, we can't nuke the entire
+ // phi or select -- the other side might still be relevant, so we special
+ // case them here and use a separate structure to track the operands
+ // themselves which should be replaced with undef.
+ // FIXME: This should instead be escaped in the event we're instrumenting
+ // for address sanitization.
+ if ((Offset.isNegative() && Offset.uge(SelectSize)) ||
+ (!Offset.isNegative() && Offset.uge(AllocSize))) {
+ S.DeadOperands.push_back(U);
+ return;
}
- New.BeginOffset = New.EndOffset;
- if (!New.IsSplittable) {
- New.EndOffset = std::max(New.EndOffset, SplitEndOffset);
- if (j != e && !Partitions[j].IsSplittable)
- New.EndOffset = std::min(New.EndOffset, Partitions[j].BeginOffset);
- New.IsSplittable = true;
- // If there is a trailing splittable partition which won't be fused into
- // the next splittable partition go ahead and add it onto the partitions
- // list.
- if (New.BeginOffset < New.EndOffset &&
- (j == e || !Partitions[j].IsSplittable ||
- New.EndOffset < Partitions[j].BeginOffset)) {
- Partitions.push_back(New);
- New.BeginOffset = New.EndOffset = 0ull;
- }
- }
+ insertUse(SI, Offset, SelectSize);
}
- // Re-sort the partitions now that they have been split and merged into
- // disjoint set of partitions. Also remove any of the dead partitions we've
- // replaced in the process.
- std::sort(Partitions.begin(), Partitions.end());
- if (NumDeadPartitions) {
- assert(Partitions.back().isDead());
- assert((ptrdiff_t)NumDeadPartitions ==
- std::count(Partitions.begin(), Partitions.end(), Partitions.back()));
+ /// \brief Disable SROA entirely if there are unhandled users of the alloca.
+ void visitInstruction(Instruction &I) {
+ PI.setAborted(&I);
}
- Partitions.erase(Partitions.end() - NumDeadPartitions, Partitions.end());
-}
+};
-AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
+AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
:
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
AI(AI),
#endif
PointerEscapingInstr(0) {
- PartitionBuilder PB(TD, AI, *this);
- PartitionBuilder::PtrInfo PtrI = PB.visitPtr(AI);
+ SliceBuilder PB(DL, AI, *this);
+ SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
if (PtrI.isEscaped() || PtrI.isAborted()) {
// FIXME: We should sink the escape vs. abort info into the caller nicely,
- // possibly by just storing the PtrInfo in the AllocaPartitioning.
+ // possibly by just storing the PtrInfo in the AllocaSlices.
PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
: PtrI.getAbortingInst();
assert(PointerEscapingInstr && "Did not track a bad instruction");
return;
}
+ Slices.erase(std::remove_if(Slices.begin(), Slices.end(),
+ std::mem_fun_ref(&Slice::isDead)),
+ Slices.end());
+
// Sort the uses. This arranges for the offsets to be in ascending order,
// and the sizes to be in descending order.
- std::sort(Partitions.begin(), Partitions.end());
-
- // Remove any partitions from the back which are marked as dead.
- while (!Partitions.empty() && Partitions.back().isDead())
- Partitions.pop_back();
-
- if (Partitions.size() > 1) {
- // Intersect splittability for all partitions with equal offsets and sizes.
- // Then remove all but the first so that we have a sequence of non-equal but
- // potentially overlapping partitions.
- for (iterator I = Partitions.begin(), J = I, E = Partitions.end(); I != E;
- I = J) {
- ++J;
- while (J != E && *I == *J) {
- I->IsSplittable &= J->IsSplittable;
- ++J;
- }
- }
- Partitions.erase(std::unique(Partitions.begin(), Partitions.end()),
- Partitions.end());
-
- // Split splittable and merge unsplittable partitions into a disjoint set
- // of partitions over the used space of the allocation.
- splitAndMergePartitions();
- }
-
- // Record how many partitions we end up with.
- NumAllocaPartitions += Partitions.size();
- MaxPartitionsPerAlloca = std::max<unsigned>(Partitions.size(), MaxPartitionsPerAlloca);
-
- // Now build up the user lists for each of these disjoint partitions by
- // re-walking the recursive users of the alloca.
- Uses.resize(Partitions.size());
- UseBuilder UB(TD, AI, *this);
- PtrI = UB.visitPtr(AI);
- assert(!PtrI.isEscaped() && "Previously analyzed pointer now escapes!");
- assert(!PtrI.isAborted() && "Early aborted the visit of the pointer.");
-
- unsigned NumUses = 0;
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
- for (unsigned Idx = 0, Size = Uses.size(); Idx != Size; ++Idx)
- NumUses += Uses[Idx].size();
-#endif
- NumAllocaPartitionUses += NumUses;
- MaxPartitionUsesPerAlloca = std::max<unsigned>(NumUses, MaxPartitionUsesPerAlloca);
+ std::sort(Slices.begin(), Slices.end());
}
-Type *AllocaPartitioning::getCommonType(iterator I) const {
- Type *Ty = 0;
- for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
- Use *U = UI->getUse();
- if (!U)
- continue; // Skip dead uses.
- if (isa<IntrinsicInst>(*U->getUser()))
- continue;
- if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset)
- continue;
-
- Type *UserTy = 0;
- if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser()))
- UserTy = LI->getType();
- else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser()))
- UserTy = SI->getValueOperand()->getType();
- else
- return 0; // Bail if we have weird uses.
-
- if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
- // If the type is larger than the partition, skip it. We only encounter
- // this for split integer operations where we want to use the type of the
- // entity causing the split.
- if (ITy->getBitWidth() > (I->EndOffset - I->BeginOffset)*8)
- continue;
-
- // If we have found an integer type use covering the alloca, use that
- // regardless of the other types, as integers are often used for a "bucket
- // of bits" type.
- return ITy;
- }
-
- if (Ty && Ty != UserTy)
- return 0;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- Ty = UserTy;
- }
- return Ty;
+void AllocaSlices::print(raw_ostream &OS, const_iterator I,
+ StringRef Indent) const {
+ printSlice(OS, I, Indent);
+ printUse(OS, I, Indent);
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-
-void AllocaPartitioning::print(raw_ostream &OS, const_iterator I,
- StringRef Indent) const {
- OS << Indent << "partition #" << (I - begin())
- << " [" << I->BeginOffset << "," << I->EndOffset << ")"
- << (I->IsSplittable ? " (splittable)" : "")
- << (Uses[I - begin()].empty() ? " (zero uses)" : "")
- << "\n";
+void AllocaSlices::printSlice(raw_ostream &OS, const_iterator I,
+ StringRef Indent) const {
+ OS << Indent << "[" << I->beginOffset() << "," << I->endOffset() << ")"
+ << " slice #" << (I - begin())
+ << (I->isSplittable() ? " (splittable)" : "") << "\n";
}
-void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I,
- StringRef Indent) const {
- for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
- if (!UI->getUse())
- continue; // Skip dead uses.
- OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") "
- << "used by: " << *UI->getUse()->getUser() << "\n";
- if (MemTransferInst *II =
- dyn_cast<MemTransferInst>(UI->getUse()->getUser())) {
- const MemTransferOffsets &MTO = MemTransferInstData.lookup(II);
- bool IsDest;
- if (!MTO.IsSplittable)
- IsDest = UI->BeginOffset == MTO.DestBegin;
- else
- IsDest = MTO.DestBegin != 0u;
- OS << Indent << " (original " << (IsDest ? "dest" : "source") << ": "
- << "[" << (IsDest ? MTO.DestBegin : MTO.SourceBegin)
- << "," << (IsDest ? MTO.DestEnd : MTO.SourceEnd) << ")\n";
- }
- }
+void AllocaSlices::printUse(raw_ostream &OS, const_iterator I,
+ StringRef Indent) const {
+ OS << Indent << " used by: " << *I->getUse()->getUser() << "\n";
}
-void AllocaPartitioning::print(raw_ostream &OS) const {
+void AllocaSlices::print(raw_ostream &OS) const {
if (PointerEscapingInstr) {
- OS << "No partitioning for alloca: " << AI << "\n"
+ OS << "Can't analyze slices for alloca: " << AI << "\n"
<< " A pointer to this alloca escaped by:\n"
<< " " << *PointerEscapingInstr << "\n";
return;
}
- OS << "Partitioning of alloca: " << AI << "\n";
- for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ OS << "Slices of alloca: " << AI << "\n";
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
print(OS, I);
- printUsers(OS, I);
- }
}
-void AllocaPartitioning::dump(const_iterator I) const { print(dbgs(), I); }
-void AllocaPartitioning::dump() const { print(dbgs()); }
+void AllocaSlices::dump(const_iterator I) const { print(dbgs(), I); }
+void AllocaSlices::dump() const { print(dbgs()); }
#endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-
namespace {
/// \brief Implementation of LoadAndStorePromoter for promoting allocas.
///
@@ -1269,12 +733,13 @@ class AllocaPromoter : public LoadAndStorePromoter {
SmallVector<DbgValueInst *, 4> DVIs;
public:
- AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
+ AllocaPromoter(const SmallVectorImpl<Instruction *> &Insts, SSAUpdater &S,
AllocaInst &AI, DIBuilder &DIB)
- : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {}
+ : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {}
void run(const SmallVectorImpl<Instruction*> &Insts) {
- // Remember which alloca we're promoting (for isInstInList).
+ // Retain the debug information attached to the alloca for use when
+ // rewriting loads and stores.
if (MDNode *DebugNode = MDNode::getIfExists(AI.getContext(), &AI)) {
for (Value::use_iterator UI = DebugNode->use_begin(),
UE = DebugNode->use_end();
@@ -1286,7 +751,9 @@ public:
}
LoadAndStorePromoter::run(Insts);
- AI.eraseFromParent();
+
+ // While we have the debug information, clear it off of the alloca. The
+ // caller takes care of deleting the alloca.
while (!DDIs.empty())
DDIs.pop_back_val()->eraseFromParent();
while (!DVIs.empty())
@@ -1295,13 +762,34 @@ public:
virtual bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction*> &Insts) const {
+ Value *Ptr;
if (LoadInst *LI = dyn_cast<LoadInst>(I))
- return LI->getOperand(0) == &AI;
- return cast<StoreInst>(I)->getPointerOperand() == &AI;
+ Ptr = LI->getOperand(0);
+ else
+ Ptr = cast<StoreInst>(I)->getPointerOperand();
+
+ // Only used to detect cycles, which will be rare and quickly found as
+ // we're walking up a chain of defs rather than down through uses.
+ SmallPtrSet<Value *, 4> Visited;
+
+ do {
+ if (Ptr == &AI)
+ return true;
+
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(Ptr))
+ Ptr = BCI->getOperand(0);
+ else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
+ Ptr = GEPI->getPointerOperand();
+ else
+ return false;
+
+ } while (Visited.insert(Ptr));
+
+ return false;
}
virtual void updateDebugInfo(Instruction *Inst) const {
- for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(),
+ for (SmallVectorImpl<DbgDeclareInst *>::const_iterator I = DDIs.begin(),
E = DDIs.end(); I != E; ++I) {
DbgDeclareInst *DDI = *I;
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
@@ -1309,7 +797,7 @@ public:
else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
}
- for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(),
+ for (SmallVectorImpl<DbgValueInst *>::const_iterator I = DVIs.begin(),
E = DVIs.end(); I != E; ++I) {
DbgValueInst *DVI = *I;
Value *Arg = 0;
@@ -1360,7 +848,7 @@ class SROA : public FunctionPass {
const bool RequiresDomTree;
LLVMContext *C;
- const DataLayout *TD;
+ const DataLayout *DL;
DominatorTree *DT;
/// \brief Worklist of alloca instructions to simplify.
@@ -1390,10 +878,25 @@ class SROA : public FunctionPass {
/// \brief A collection of alloca instructions we can directly promote.
std::vector<AllocaInst *> PromotableAllocas;
+ /// \brief A worklist of PHIs to speculate prior to promoting allocas.
+ ///
+ /// All of these PHIs have been checked for the safety of speculation and by
+ /// being speculated will allow promoting allocas currently in the promotable
+ /// queue.
+ SetVector<PHINode *, SmallVector<PHINode *, 2> > SpeculatablePHIs;
+
+ /// \brief A worklist of select instructions to speculate prior to promoting
+ /// allocas.
+ ///
+ /// All of these select instructions have been checked for the safety of
+ /// speculation and by being speculated will allow promoting allocas
+ /// currently in the promotable queue.
+ SetVector<SelectInst *, SmallVector<SelectInst *, 2> > SpeculatableSelects;
+
public:
SROA(bool RequiresDomTree = true)
: FunctionPass(ID), RequiresDomTree(RequiresDomTree),
- C(0), TD(0), DT(0) {
+ C(0), DL(0), DT(0) {
initializeSROAPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F);
@@ -1404,13 +907,13 @@ public:
private:
friend class PHIOrSelectSpeculator;
- friend class AllocaPartitionRewriter;
- friend class AllocaPartitionVectorRewriter;
+ friend class AllocaSliceRewriter;
- bool rewriteAllocaPartition(AllocaInst &AI,
- AllocaPartitioning &P,
- AllocaPartitioning::iterator PI);
- bool splitAlloca(AllocaInst &AI, AllocaPartitioning &P);
+ bool rewritePartition(AllocaInst &AI, AllocaSlices &S,
+ AllocaSlices::iterator B, AllocaSlices::iterator E,
+ int64_t BeginOffset, int64_t EndOffset,
+ ArrayRef<AllocaSlices::iterator> SplitUses);
+ bool splitAlloca(AllocaInst &AI, AllocaSlices &S);
bool runOnAlloca(AllocaInst &AI);
void deleteDeadInstructions(SmallPtrSet<AllocaInst *, 4> &DeletedAllocas);
bool promoteAllocas(Function &F);
@@ -1429,286 +932,255 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates",
false, false)
-namespace {
-/// \brief Visitor to speculate PHIs and Selects where possible.
-class PHIOrSelectSpeculator : public InstVisitor<PHIOrSelectSpeculator> {
- // Befriend the base class so it can delegate to private visit methods.
- friend class llvm::InstVisitor<PHIOrSelectSpeculator>;
-
- const DataLayout &TD;
- AllocaPartitioning &P;
- SROA &Pass;
+/// Walk the range of a partitioning looking for a common type to cover this
+/// sequence of slices.
+static Type *findCommonType(AllocaSlices::const_iterator B,
+ AllocaSlices::const_iterator E,
+ uint64_t EndOffset) {
+ Type *Ty = 0;
+ bool IgnoreNonIntegralTypes = false;
+ for (AllocaSlices::const_iterator I = B; I != E; ++I) {
+ Use *U = I->getUse();
+ if (isa<IntrinsicInst>(*U->getUser()))
+ continue;
+ if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
+ continue;
-public:
- PHIOrSelectSpeculator(const DataLayout &TD, AllocaPartitioning &P, SROA &Pass)
- : TD(TD), P(P), Pass(Pass) {}
-
- /// \brief Visit the users of an alloca partition and rewrite them.
- void visitUsers(AllocaPartitioning::const_iterator PI) {
- // Note that we need to use an index here as the underlying vector of uses
- // may be grown during speculation. However, we never need to re-visit the
- // new uses, and so we can use the initial size bound.
- for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) {
- const PartitionUse &PU = P.getUse(PI, Idx);
- if (!PU.getUse())
- continue; // Skip dead use.
-
- visit(cast<Instruction>(PU.getUse()->getUser()));
+ Type *UserTy = 0;
+ if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
+ UserTy = LI->getType();
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
+ UserTy = SI->getValueOperand()->getType();
+ } else {
+ IgnoreNonIntegralTypes = true; // Give up on anything but an iN type.
+ continue;
}
- }
-private:
- // By default, skip this instruction.
- void visitInstruction(Instruction &I) {}
-
- /// PHI instructions that use an alloca and are subsequently loaded can be
- /// rewritten to load both input pointers in the pred blocks and then PHI the
- /// results, allowing the load of the alloca to be promoted.
- /// From this:
- /// %P2 = phi [i32* %Alloca, i32* %Other]
- /// %V = load i32* %P2
- /// to:
- /// %V1 = load i32* %Alloca -> will be mem2reg'd
- /// ...
- /// %V2 = load i32* %Other
- /// ...
- /// %V = phi [i32 %V1, i32 %V2]
- ///
- /// We can do this to a select if its only uses are loads and if the operands
- /// to the select can be loaded unconditionally.
- ///
- /// FIXME: This should be hoisted into a generic utility, likely in
- /// Transforms/Util/Local.h
- bool isSafePHIToSpeculate(PHINode &PN, SmallVectorImpl<LoadInst *> &Loads) {
- // For now, we can only do this promotion if the load is in the same block
- // as the PHI, and if there are no stores between the phi and load.
- // TODO: Allow recursive phi users.
- // TODO: Allow stores.
- BasicBlock *BB = PN.getParent();
- unsigned MaxAlign = 0;
- for (Value::use_iterator UI = PN.use_begin(), UE = PN.use_end();
- UI != UE; ++UI) {
- LoadInst *LI = dyn_cast<LoadInst>(*UI);
- if (LI == 0 || !LI->isSimple()) return false;
-
- // For now we only allow loads in the same block as the PHI. This is
- // a common case that happens when instcombine merges two loads through
- // a PHI.
- if (LI->getParent() != BB) return false;
-
- // Ensure that there are no instructions between the PHI and the load that
- // could store.
- for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI)
- if (BBI->mayWriteToMemory())
- return false;
-
- MaxAlign = std::max(MaxAlign, LI->getAlignment());
- Loads.push_back(LI);
+ if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
+ // If the type is larger than the partition, skip it. We only encounter
+ // this for split integer operations where we want to use the type of the
+ // entity causing the split. Also skip if the type is not a byte width
+ // multiple.
+ if (ITy->getBitWidth() % 8 != 0 ||
+ ITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
+ continue;
+
+ // If we have found an integer type use covering the alloca, use that
+ // regardless of the other types, as integers are often used for
+ // a "bucket of bits" type.
+ //
+ // NB: This *must* be the only return from inside the loop so that the
+ // order of slices doesn't impact the computed type.
+ return ITy;
+ } else if (IgnoreNonIntegralTypes) {
+ continue;
}
- // We can only transform this if it is safe to push the loads into the
- // predecessor blocks. The only thing to watch out for is that we can't put
- // a possibly trapping load in the predecessor if it is a critical edge.
- for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
- TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();
- Value *InVal = PN.getIncomingValue(Idx);
-
- // If the value is produced by the terminator of the predecessor (an
- // invoke) or it has side-effects, there is no valid place to put a load
- // in the predecessor.
- if (TI == InVal || TI->mayHaveSideEffects())
- return false;
+ if (Ty && Ty != UserTy)
+ IgnoreNonIntegralTypes = true; // Give up on anything but an iN type.
- // If the predecessor has a single successor, then the edge isn't
- // critical.
- if (TI->getNumSuccessors() == 1)
- continue;
+ Ty = UserTy;
+ }
+ return Ty;
+}
- // If this pointer is always safe to load, or if we can prove that there
- // is already a load in the block, then we can move the load to the pred
- // block.
- if (InVal->isDereferenceablePointer() ||
- isSafeToLoadUnconditionally(InVal, TI, MaxAlign, &TD))
- continue;
+/// PHI instructions that use an alloca and are subsequently loaded can be
+/// rewritten to load both input pointers in the pred blocks and then PHI the
+/// results, allowing the load of the alloca to be promoted.
+/// From this:
+/// %P2 = phi [i32* %Alloca, i32* %Other]
+/// %V = load i32* %P2
+/// to:
+/// %V1 = load i32* %Alloca -> will be mem2reg'd
+/// ...
+/// %V2 = load i32* %Other
+/// ...
+/// %V = phi [i32 %V1, i32 %V2]
+///
+/// We can do this to a select if its only uses are loads and if the operands
+/// to the select can be loaded unconditionally.
+///
+/// FIXME: This should be hoisted into a generic utility, likely in
+/// Transforms/Util/Local.h
+static bool isSafePHIToSpeculate(PHINode &PN,
+ const DataLayout *DL = 0) {
+ // For now, we can only do this promotion if the load is in the same block
+ // as the PHI, and if there are no stores between the phi and load.
+ // TODO: Allow recursive phi users.
+ // TODO: Allow stores.
+ BasicBlock *BB = PN.getParent();
+ unsigned MaxAlign = 0;
+ bool HaveLoad = false;
+ for (Value::use_iterator UI = PN.use_begin(), UE = PN.use_end(); UI != UE;
+ ++UI) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ if (LI == 0 || !LI->isSimple())
+ return false;
+ // For now we only allow loads in the same block as the PHI. This is
+ // a common case that happens when instcombine merges two loads through
+ // a PHI.
+ if (LI->getParent() != BB)
return false;
- }
- return true;
+ // Ensure that there are no instructions between the PHI and the load that
+ // could store.
+ for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI)
+ if (BBI->mayWriteToMemory())
+ return false;
+
+ MaxAlign = std::max(MaxAlign, LI->getAlignment());
+ HaveLoad = true;
}
- void visitPHINode(PHINode &PN) {
- DEBUG(dbgs() << " original: " << PN << "\n");
+ if (!HaveLoad)
+ return false;
- SmallVector<LoadInst *, 4> Loads;
- if (!isSafePHIToSpeculate(PN, Loads))
- return;
+ // We can only transform this if it is safe to push the loads into the
+ // predecessor blocks. The only thing to watch out for is that we can't put
+ // a possibly trapping load in the predecessor if it is a critical edge.
+ for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
+ TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();
+ Value *InVal = PN.getIncomingValue(Idx);
+
+ // If the value is produced by the terminator of the predecessor (an
+ // invoke) or it has side-effects, there is no valid place to put a load
+ // in the predecessor.
+ if (TI == InVal || TI->mayHaveSideEffects())
+ return false;
- assert(!Loads.empty());
+ // If the predecessor has a single successor, then the edge isn't
+ // critical.
+ if (TI->getNumSuccessors() == 1)
+ continue;
- Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
- IRBuilderTy PHIBuilder(&PN);
- PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
- PN.getName() + ".sroa.speculated");
+ // If this pointer is always safe to load, or if we can prove that there
+ // is already a load in the block, then we can move the load to the pred
+ // block.
+ if (InVal->isDereferenceablePointer() ||
+ isSafeToLoadUnconditionally(InVal, TI, MaxAlign, DL))
+ continue;
- // Get the TBAA tag and alignment to use from one of the loads. It doesn't
- // matter which one we get and if any differ.
- LoadInst *SomeLoad = cast<LoadInst>(Loads.back());
- MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
- unsigned Align = SomeLoad->getAlignment();
+ return false;
+ }
- // Rewrite all loads of the PN to use the new PHI.
- do {
- LoadInst *LI = Loads.pop_back_val();
- LI->replaceAllUsesWith(NewPN);
- Pass.DeadInsts.insert(LI);
- } while (!Loads.empty());
-
- // Inject loads into all of the pred blocks.
- for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
- BasicBlock *Pred = PN.getIncomingBlock(Idx);
- TerminatorInst *TI = Pred->getTerminator();
- Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx));
- Value *InVal = PN.getIncomingValue(Idx);
- IRBuilderTy PredBuilder(TI);
-
- LoadInst *Load
- = PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." +
- Pred->getName()));
- ++NumLoadsSpeculated;
- Load->setAlignment(Align);
- if (TBAATag)
- Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
- NewPN->addIncoming(Load, Pred);
-
- Instruction *Ptr = dyn_cast<Instruction>(InVal);
- if (!Ptr)
- // No uses to rewrite.
- continue;
+ return true;
+}
- // Try to lookup and rewrite any partition uses corresponding to this phi
- // input.
- AllocaPartitioning::iterator PI
- = P.findPartitionForPHIOrSelectOperand(InUse);
- if (PI == P.end())
- continue;
+static void speculatePHINodeLoads(PHINode &PN) {
+ DEBUG(dbgs() << " original: " << PN << "\n");
+
+ Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
+ IRBuilderTy PHIBuilder(&PN);
+ PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
+ PN.getName() + ".sroa.speculated");
+
+ // Get the TBAA tag and alignment to use from one of the loads. It doesn't
+ // matter which one we get and if any differ.
+ LoadInst *SomeLoad = cast<LoadInst>(*PN.use_begin());
+ MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
+ unsigned Align = SomeLoad->getAlignment();
+
+ // Rewrite all loads of the PN to use the new PHI.
+ while (!PN.use_empty()) {
+ LoadInst *LI = cast<LoadInst>(*PN.use_begin());
+ LI->replaceAllUsesWith(NewPN);
+ LI->eraseFromParent();
+ }
+
+ // Inject loads into all of the pred blocks.
+ for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
+ BasicBlock *Pred = PN.getIncomingBlock(Idx);
+ TerminatorInst *TI = Pred->getTerminator();
+ Value *InVal = PN.getIncomingValue(Idx);
+ IRBuilderTy PredBuilder(TI);
+
+ LoadInst *Load = PredBuilder.CreateLoad(
+ InVal, (PN.getName() + ".sroa.speculate.load." + Pred->getName()));
+ ++NumLoadsSpeculated;
+ Load->setAlignment(Align);
+ if (TBAATag)
+ Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+ NewPN->addIncoming(Load, Pred);
+ }
+
+ DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
+ PN.eraseFromParent();
+}
- // Replace the Use in the PartitionUse for this operand with the Use
- // inside the load.
- AllocaPartitioning::use_iterator UI
- = P.findPartitionUseForPHIOrSelectOperand(InUse);
- assert(isa<PHINode>(*UI->getUse()->getUser()));
- UI->setUse(&Load->getOperandUse(Load->getPointerOperandIndex()));
- }
- DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
- }
-
- /// Select instructions that use an alloca and are subsequently loaded can be
- /// rewritten to load both input pointers and then select between the result,
- /// allowing the load of the alloca to be promoted.
- /// From this:
- /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
- /// %V = load i32* %P2
- /// to:
- /// %V1 = load i32* %Alloca -> will be mem2reg'd
- /// %V2 = load i32* %Other
- /// %V = select i1 %cond, i32 %V1, i32 %V2
- ///
- /// We can do this to a select if its only uses are loads and if the operand
- /// to the select can be loaded unconditionally.
- bool isSafeSelectToSpeculate(SelectInst &SI,
- SmallVectorImpl<LoadInst *> &Loads) {
- Value *TValue = SI.getTrueValue();
- Value *FValue = SI.getFalseValue();
- bool TDerefable = TValue->isDereferenceablePointer();
- bool FDerefable = FValue->isDereferenceablePointer();
-
- for (Value::use_iterator UI = SI.use_begin(), UE = SI.use_end();
- UI != UE; ++UI) {
- LoadInst *LI = dyn_cast<LoadInst>(*UI);
- if (LI == 0 || !LI->isSimple()) return false;
-
- // Both operands to the select need to be dereferencable, either
- // absolutely (e.g. allocas) or at this point because we can see other
- // accesses to it.
- if (!TDerefable && !isSafeToLoadUnconditionally(TValue, LI,
- LI->getAlignment(), &TD))
- return false;
- if (!FDerefable && !isSafeToLoadUnconditionally(FValue, LI,
- LI->getAlignment(), &TD))
- return false;
- Loads.push_back(LI);
- }
+/// Select instructions that use an alloca and are subsequently loaded can be
+/// rewritten to load both input pointers and then select between the result,
+/// allowing the load of the alloca to be promoted.
+/// From this:
+/// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
+/// %V = load i32* %P2
+/// to:
+/// %V1 = load i32* %Alloca -> will be mem2reg'd
+/// %V2 = load i32* %Other
+/// %V = select i1 %cond, i32 %V1, i32 %V2
+///
+/// We can do this to a select if its only uses are loads and if the operand
+/// to the select can be loaded unconditionally.
+static bool isSafeSelectToSpeculate(SelectInst &SI, const DataLayout *DL = 0) {
+ Value *TValue = SI.getTrueValue();
+ Value *FValue = SI.getFalseValue();
+ bool TDerefable = TValue->isDereferenceablePointer();
+ bool FDerefable = FValue->isDereferenceablePointer();
+
+ for (Value::use_iterator UI = SI.use_begin(), UE = SI.use_end(); UI != UE;
+ ++UI) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ if (LI == 0 || !LI->isSimple())
+ return false;
- return true;
+ // Both operands to the select need to be dereferencable, either
+ // absolutely (e.g. allocas) or at this point because we can see other
+ // accesses to it.
+ if (!TDerefable &&
+ !isSafeToLoadUnconditionally(TValue, LI, LI->getAlignment(), DL))
+ return false;
+ if (!FDerefable &&
+ !isSafeToLoadUnconditionally(FValue, LI, LI->getAlignment(), DL))
+ return false;
}
- void visitSelectInst(SelectInst &SI) {
- DEBUG(dbgs() << " original: " << SI << "\n");
-
- // If the select isn't safe to speculate, just use simple logic to emit it.
- SmallVector<LoadInst *, 4> Loads;
- if (!isSafeSelectToSpeculate(SI, Loads))
- return;
+ return true;
+}
- IRBuilderTy IRB(&SI);
- Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };
- AllocaPartitioning::iterator PIs[2];
- PartitionUse PUs[2];
- for (unsigned i = 0, e = 2; i != e; ++i) {
- PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]);
- if (PIs[i] != P.end()) {
- // If the pointer is within the partitioning, remove the select from
- // its uses. We'll add in the new loads below.
- AllocaPartitioning::use_iterator UI
- = P.findPartitionUseForPHIOrSelectOperand(Ops[i]);
- PUs[i] = *UI;
- // Clear out the use here so that the offsets into the use list remain
- // stable but this use is ignored when rewriting.
- UI->setUse(0);
- }
- }
+static void speculateSelectInstLoads(SelectInst &SI) {
+ DEBUG(dbgs() << " original: " << SI << "\n");
- Value *TV = SI.getTrueValue();
- Value *FV = SI.getFalseValue();
- // Replace the loads of the select with a select of two loads.
- while (!Loads.empty()) {
- LoadInst *LI = Loads.pop_back_val();
+ IRBuilderTy IRB(&SI);
+ Value *TV = SI.getTrueValue();
+ Value *FV = SI.getFalseValue();
+ // Replace the loads of the select with a select of two loads.
+ while (!SI.use_empty()) {
+ LoadInst *LI = cast<LoadInst>(*SI.use_begin());
+ assert(LI->isSimple() && "We only speculate simple loads");
- IRB.SetInsertPoint(LI);
- LoadInst *TL =
+ IRB.SetInsertPoint(LI);
+ LoadInst *TL =
IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true");
- LoadInst *FL =
+ LoadInst *FL =
IRB.CreateLoad(FV, LI->getName() + ".sroa.speculate.load.false");
- NumLoadsSpeculated += 2;
-
- // Transfer alignment and TBAA info if present.
- TL->setAlignment(LI->getAlignment());
- FL->setAlignment(LI->getAlignment());
- if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
- TL->setMetadata(LLVMContext::MD_tbaa, Tag);
- FL->setMetadata(LLVMContext::MD_tbaa, Tag);
- }
+ NumLoadsSpeculated += 2;
- Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
- LI->getName() + ".sroa.speculated");
+ // Transfer alignment and TBAA info if present.
+ TL->setAlignment(LI->getAlignment());
+ FL->setAlignment(LI->getAlignment());
+ if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
+ TL->setMetadata(LLVMContext::MD_tbaa, Tag);
+ FL->setMetadata(LLVMContext::MD_tbaa, Tag);
+ }
- LoadInst *Loads[2] = { TL, FL };
- for (unsigned i = 0, e = 2; i != e; ++i) {
- if (PIs[i] != P.end()) {
- Use *LoadUse = &Loads[i]->getOperandUse(0);
- assert(PUs[i].getUse()->get() == LoadUse->get());
- PUs[i].setUse(LoadUse);
- P.use_push_back(PIs[i], PUs[i]);
- }
- }
+ Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
+ LI->getName() + ".sroa.speculated");
- DEBUG(dbgs() << " speculated to: " << *V << "\n");
- LI->replaceAllUsesWith(V);
- Pass.DeadInsts.insert(LI);
- }
+ DEBUG(dbgs() << " speculated to: " << *V << "\n");
+ LI->replaceAllUsesWith(V);
+ LI->eraseFromParent();
}
-};
+ SI.eraseFromParent();
}
/// \brief Build a GEP out of a base pointer and indices.
@@ -1737,7 +1209,7 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
/// TargetTy. If we can't find one with the same type, we at least try to use
/// one with the same size. If none of that works, we just produce the GEP as
/// indicated by Indices to have the correct offset.
-static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &TD,
+static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
Value *BasePtr, Type *Ty, Type *TargetTy,
SmallVectorImpl<Value *> &Indices) {
if (Ty == TargetTy)
@@ -1754,7 +1226,7 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &TD,
ElementTy = SeqTy->getElementType();
// Note that we use the default address space as this index is over an
// array or a vector, not a pointer.
- Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(0), 0)));
+ Indices.push_back(IRB.getInt(APInt(DL.getPointerSizeInBits(0), 0)));
} else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {
if (STy->element_begin() == STy->element_end())
break; // Nothing left to descend into.
@@ -1775,12 +1247,12 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &TD,
///
/// This is the recursive step for getNaturalGEPWithOffset that walks down the
/// element types adding appropriate indices for the GEP.
-static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &TD,
+static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
Value *Ptr, Type *Ty, APInt &Offset,
Type *TargetTy,
SmallVectorImpl<Value *> &Indices) {
if (Offset == 0)
- return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices);
+ return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices);
// We can't recurse through pointer types.
if (Ty->isPointerTy())
@@ -1790,7 +1262,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &TD,
// extremely poorly defined currently. The long-term goal is to remove GEPing
// over a vector from the IR completely.
if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
- unsigned ElementSizeInBits = TD.getTypeSizeInBits(VecTy->getScalarType());
+ unsigned ElementSizeInBits = DL.getTypeSizeInBits(VecTy->getScalarType());
if (ElementSizeInBits % 8)
return 0; // GEPs over non-multiple of 8 size vector elements are invalid.
APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
@@ -1799,20 +1271,20 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &TD,
return 0;
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
- return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(),
+ return getNaturalGEPRecursively(IRB, DL, Ptr, VecTy->getElementType(),
Offset, TargetTy, Indices);
}
if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
Type *ElementTy = ArrTy->getElementType();
- APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy));
+ APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy));
APInt NumSkippedElements = Offset.sdiv(ElementSize);
if (NumSkippedElements.ugt(ArrTy->getNumElements()))
return 0;
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
- return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
+ return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
Indices);
}
@@ -1820,18 +1292,18 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &TD,
if (!STy)
return 0;
- const StructLayout *SL = TD.getStructLayout(STy);
+ const StructLayout *SL = DL.getStructLayout(STy);
uint64_t StructOffset = Offset.getZExtValue();
if (StructOffset >= SL->getSizeInBytes())
return 0;
unsigned Index = SL->getElementContainingOffset(StructOffset);
Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));
Type *ElementTy = STy->getElementType(Index);
- if (Offset.uge(TD.getTypeAllocSize(ElementTy)))
+ if (Offset.uge(DL.getTypeAllocSize(ElementTy)))
return 0; // The offset points into alignment padding.
Indices.push_back(IRB.getInt32(Index));
- return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
+ return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
Indices);
}
@@ -1845,7 +1317,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &TD,
/// Indices, and setting Ty to the result subtype.
///
/// If no natural GEP can be constructed, this function returns null.
-static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &TD,
+static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
Value *Ptr, APInt Offset, Type *TargetTy,
SmallVectorImpl<Value *> &Indices) {
PointerType *Ty = cast<PointerType>(Ptr->getType());
@@ -1858,14 +1330,14 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &TD,
Type *ElementTy = Ty->getElementType();
if (!ElementTy->isSized())
return 0; // We can't GEP through an unsized element.
- APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy));
+ APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy));
if (ElementSize == 0)
return 0; // Zero-length arrays can't help us build a natural GEP.
APInt NumSkippedElements = Offset.sdiv(ElementSize);
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
- return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
+ return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
Indices);
}
@@ -1884,7 +1356,7 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &TD,
/// properties. The algorithm tries to fold as many constant indices into
/// a single GEP as possible, thus making each GEP more independent of the
/// surrounding code.
-static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &TD,
+static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL,
Value *Ptr, APInt Offset, Type *PointerTy) {
// Even though we don't look through PHI nodes, we could be called on an
// instruction in an unreachable block, which may be on a cycle.
@@ -1908,7 +1380,7 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &TD,
// First fold any existing GEPs into the offset.
while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
APInt GEPOffset(Offset.getBitWidth(), 0);
- if (!GEP->accumulateConstantOffset(TD, GEPOffset))
+ if (!GEP->accumulateConstantOffset(DL, GEPOffset))
break;
Offset += GEPOffset;
Ptr = GEP->getPointerOperand();
@@ -1918,7 +1390,7 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &TD,
// See if we can perform a natural GEP here.
Indices.clear();
- if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy,
+ if (Value *P = getNaturalGEPWithOffset(IRB, DL, Ptr, Offset, TargetTy,
Indices)) {
if (P->getType() == PointerTy) {
// Zap any offset pointer that we ended up computing in previous rounds.
@@ -1989,6 +1461,10 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
return false;
+ // We can convert pointers to integers and vice-versa. Same for vectors
+ // of pointers and integers.
+ OldTy = OldTy->getScalarType();
+ NewTy = NewTy->getScalarType();
if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
if (NewTy->isPointerTy() && OldTy->isPointerTy())
return true;
@@ -2007,24 +1483,126 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
/// two types for viability with this routine.
static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
- Type *Ty) {
- assert(canConvertValue(DL, V->getType(), Ty) &&
- "Value not convertable to type");
- if (V->getType() == Ty)
+ Type *NewTy) {
+ Type *OldTy = V->getType();
+ assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type");
+
+ if (OldTy == NewTy)
return V;
- if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType()))
- if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty))
+
+ if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
+ if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
if (NewITy->getBitWidth() > OldITy->getBitWidth())
return IRB.CreateZExt(V, NewITy);
- if (V->getType()->isIntegerTy() && Ty->isPointerTy())
- return IRB.CreateIntToPtr(V, Ty);
- if (V->getType()->isPointerTy() && Ty->isIntegerTy())
- return IRB.CreatePtrToInt(V, Ty);
- return IRB.CreateBitCast(V, Ty);
+ // See if we need inttoptr for this type pair. A cast involving both scalars
+ // and vectors requires and additional bitcast.
+ if (OldTy->getScalarType()->isIntegerTy() &&
+ NewTy->getScalarType()->isPointerTy()) {
+ // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8*
+ if (OldTy->isVectorTy() && !NewTy->isVectorTy())
+ return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
+ NewTy);
+
+ // Expand i128 to <2 x i8*> --> i128 to <2 x i64> to <2 x i8*>
+ if (!OldTy->isVectorTy() && NewTy->isVectorTy())
+ return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
+ NewTy);
+
+ return IRB.CreateIntToPtr(V, NewTy);
+ }
+
+ // See if we need ptrtoint for this type pair. A cast involving both scalars
+ // and vectors requires and additional bitcast.
+ if (OldTy->getScalarType()->isPointerTy() &&
+ NewTy->getScalarType()->isIntegerTy()) {
+ // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128
+ if (OldTy->isVectorTy() && !NewTy->isVectorTy())
+ return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
+ NewTy);
+
+ // Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32>
+ if (!OldTy->isVectorTy() && NewTy->isVectorTy())
+ return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
+ NewTy);
+
+ return IRB.CreatePtrToInt(V, NewTy);
+ }
+
+ return IRB.CreateBitCast(V, NewTy);
}
-/// \brief Test whether the given alloca partition can be promoted to a vector.
+/// \brief Test whether the given slice use can be promoted to a vector.
+///
+/// This function is called to test each entry in a partioning which is slated
+/// for a single slice.
+static bool isVectorPromotionViableForSlice(
+ const DataLayout &DL, AllocaSlices &S, uint64_t SliceBeginOffset,
+ uint64_t SliceEndOffset, VectorType *Ty, uint64_t ElementSize,
+ AllocaSlices::const_iterator I) {
+ // First validate the slice offsets.
+ uint64_t BeginOffset =
+ std::max(I->beginOffset(), SliceBeginOffset) - SliceBeginOffset;
+ uint64_t BeginIndex = BeginOffset / ElementSize;
+ if (BeginIndex * ElementSize != BeginOffset ||
+ BeginIndex >= Ty->getNumElements())
+ return false;
+ uint64_t EndOffset =
+ std::min(I->endOffset(), SliceEndOffset) - SliceBeginOffset;
+ uint64_t EndIndex = EndOffset / ElementSize;
+ if (EndIndex * ElementSize != EndOffset || EndIndex > Ty->getNumElements())
+ return false;
+
+ assert(EndIndex > BeginIndex && "Empty vector!");
+ uint64_t NumElements = EndIndex - BeginIndex;
+ Type *SliceTy =
+ (NumElements == 1) ? Ty->getElementType()
+ : VectorType::get(Ty->getElementType(), NumElements);
+
+ Type *SplitIntTy =
+ Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);
+
+ Use *U = I->getUse();
+
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
+ if (MI->isVolatile())
+ return false;
+ if (!I->isSplittable())
+ return false; // Skip any unsplittable intrinsics.
+ } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
+ // Disable vector promotion when there are loads or stores of an FCA.
+ return false;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
+ if (LI->isVolatile())
+ return false;
+ Type *LTy = LI->getType();
+ if (SliceBeginOffset > I->beginOffset() ||
+ SliceEndOffset < I->endOffset()) {
+ assert(LTy->isIntegerTy());
+ LTy = SplitIntTy;
+ }
+ if (!canConvertValue(DL, SliceTy, LTy))
+ return false;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
+ if (SI->isVolatile())
+ return false;
+ Type *STy = SI->getValueOperand()->getType();
+ if (SliceBeginOffset > I->beginOffset() ||
+ SliceEndOffset < I->endOffset()) {
+ assert(STy->isIntegerTy());
+ STy = SplitIntTy;
+ }
+ if (!canConvertValue(DL, STy, SliceTy))
+ return false;
+ } else {
+ return false;
+ }
+
+ return true;
+}
+
+/// \brief Test whether the given alloca partitioning and range of slices can be
+/// promoted to a vector.
///
/// This is a quick test to check whether we can rewrite a particular alloca
/// partition (and its newly formed alloca) into a vector alloca with only
@@ -2032,75 +1610,103 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
/// SSA value. We only can ensure this for a limited set of operations, and we
/// don't want to do the rewrites unless we are confident that the result will
/// be promotable, so we have an early test here.
-static bool isVectorPromotionViable(const DataLayout &TD,
- Type *AllocaTy,
- AllocaPartitioning &P,
- uint64_t PartitionBeginOffset,
- uint64_t PartitionEndOffset,
- AllocaPartitioning::const_use_iterator I,
- AllocaPartitioning::const_use_iterator E) {
+static bool
+isVectorPromotionViable(const DataLayout &DL, Type *AllocaTy, AllocaSlices &S,
+ uint64_t SliceBeginOffset, uint64_t SliceEndOffset,
+ AllocaSlices::const_iterator I,
+ AllocaSlices::const_iterator E,
+ ArrayRef<AllocaSlices::iterator> SplitUses) {
VectorType *Ty = dyn_cast<VectorType>(AllocaTy);
if (!Ty)
return false;
- uint64_t ElementSize = TD.getTypeSizeInBits(Ty->getScalarType());
+ uint64_t ElementSize = DL.getTypeSizeInBits(Ty->getScalarType());
// While the definition of LLVM vectors is bitpacked, we don't support sizes
// that aren't byte sized.
if (ElementSize % 8)
return false;
- assert((TD.getTypeSizeInBits(Ty) % 8) == 0 &&
+ assert((DL.getTypeSizeInBits(Ty) % 8) == 0 &&
"vector size not a multiple of element size?");
ElementSize /= 8;
- for (; I != E; ++I) {
- Use *U = I->getUse();
- if (!U)
- continue; // Skip dead use.
-
- uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset;
- uint64_t BeginIndex = BeginOffset / ElementSize;
- if (BeginIndex * ElementSize != BeginOffset ||
- BeginIndex >= Ty->getNumElements())
+ for (; I != E; ++I)
+ if (!isVectorPromotionViableForSlice(DL, S, SliceBeginOffset,
+ SliceEndOffset, Ty, ElementSize, I))
return false;
- uint64_t EndOffset = I->EndOffset - PartitionBeginOffset;
- uint64_t EndIndex = EndOffset / ElementSize;
- if (EndIndex * ElementSize != EndOffset ||
- EndIndex > Ty->getNumElements())
+
+ for (ArrayRef<AllocaSlices::iterator>::const_iterator SUI = SplitUses.begin(),
+ SUE = SplitUses.end();
+ SUI != SUE; ++SUI)
+ if (!isVectorPromotionViableForSlice(DL, S, SliceBeginOffset,
+ SliceEndOffset, Ty, ElementSize, *SUI))
return false;
- assert(EndIndex > BeginIndex && "Empty vector!");
- uint64_t NumElements = EndIndex - BeginIndex;
- Type *PartitionTy
- = (NumElements == 1) ? Ty->getElementType()
- : VectorType::get(Ty->getElementType(), NumElements);
+ return true;
+}
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
- if (MI->isVolatile())
- return false;
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
- const AllocaPartitioning::MemTransferOffsets &MTO
- = P.getMemTransferOffsets(*MTI);
- if (!MTO.IsSplittable)
- return false;
- }
- } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
- // Disable vector promotion when there are loads or stores of an FCA.
+/// \brief Test whether a slice of an alloca is valid for integer widening.
+///
+/// This implements the necessary checking for the \c isIntegerWideningViable
+/// test below on a single slice of the alloca.
+static bool isIntegerWideningViableForSlice(const DataLayout &DL,
+ Type *AllocaTy,
+ uint64_t AllocBeginOffset,
+ uint64_t Size, AllocaSlices &S,
+ AllocaSlices::const_iterator I,
+ bool &WholeAllocaOp) {
+ uint64_t RelBegin = I->beginOffset() - AllocBeginOffset;
+ uint64_t RelEnd = I->endOffset() - AllocBeginOffset;
+
+ // We can't reasonably handle cases where the load or store extends past
+ // the end of the aloca's type and into its padding.
+ if (RelEnd > Size)
+ return false;
+
+ Use *U = I->getUse();
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
+ if (LI->isVolatile())
return false;
- } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
- if (LI->isVolatile())
- return false;
- if (!canConvertValue(TD, PartitionTy, LI->getType()))
- return false;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
- if (SI->isVolatile())
+ if (RelBegin == 0 && RelEnd == Size)
+ WholeAllocaOp = true;
+ if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
+ if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy))
return false;
- if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy))
+ } else if (RelBegin != 0 || RelEnd != Size ||
+ !canConvertValue(DL, AllocaTy, LI->getType())) {
+ // Non-integer loads need to be convertible from the alloca type so that
+ // they are promotable.
+ return false;
+ }
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
+ Type *ValueTy = SI->getValueOperand()->getType();
+ if (SI->isVolatile())
+ return false;
+ if (RelBegin == 0 && RelEnd == Size)
+ WholeAllocaOp = true;
+ if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
+ if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy))
return false;
- } else {
+ } else if (RelBegin != 0 || RelEnd != Size ||
+ !canConvertValue(DL, ValueTy, AllocaTy)) {
+ // Non-integer stores need to be convertible to the alloca type so that
+ // they are promotable.
return false;
}
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
+ if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
+ return false;
+ if (!I->isSplittable())
+ return false; // Skip any unsplittable intrinsics.
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
+ if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end)
+ return false;
+ } else {
+ return false;
}
+
return true;
}
@@ -2110,97 +1716,50 @@ static bool isVectorPromotionViable(const DataLayout &TD,
/// This is a quick test to check whether we can rewrite the integer loads and
/// stores to a particular alloca into wider loads and stores and be able to
/// promote the resulting alloca.
-static bool isIntegerWideningViable(const DataLayout &TD,
- Type *AllocaTy,
- uint64_t AllocBeginOffset,
- AllocaPartitioning &P,
- AllocaPartitioning::const_use_iterator I,
- AllocaPartitioning::const_use_iterator E) {
- uint64_t SizeInBits = TD.getTypeSizeInBits(AllocaTy);
+static bool
+isIntegerWideningViable(const DataLayout &DL, Type *AllocaTy,
+ uint64_t AllocBeginOffset, AllocaSlices &S,
+ AllocaSlices::const_iterator I,
+ AllocaSlices::const_iterator E,
+ ArrayRef<AllocaSlices::iterator> SplitUses) {
+ uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy);
// Don't create integer types larger than the maximum bitwidth.
if (SizeInBits > IntegerType::MAX_INT_BITS)
return false;
// Don't try to handle allocas with bit-padding.
- if (SizeInBits != TD.getTypeStoreSizeInBits(AllocaTy))
+ if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy))
return false;
// We need to ensure that an integer type with the appropriate bitwidth can
// be converted to the alloca type, whatever that is. We don't want to force
// the alloca itself to have an integer type if there is a more suitable one.
Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits);
- if (!canConvertValue(TD, AllocaTy, IntTy) ||
- !canConvertValue(TD, IntTy, AllocaTy))
+ if (!canConvertValue(DL, AllocaTy, IntTy) ||
+ !canConvertValue(DL, IntTy, AllocaTy))
return false;
- uint64_t Size = TD.getTypeStoreSize(AllocaTy);
-
- // Check the uses to ensure the uses are (likely) promotable integer uses.
- // Also ensure that the alloca has a covering load or store. We don't want
- // to widen the integer operations only to fail to promote due to some other
- // unsplittable entry (which we may make splittable later).
- bool WholeAllocaOp = false;
- for (; I != E; ++I) {
- Use *U = I->getUse();
- if (!U)
- continue; // Skip dead use.
+ uint64_t Size = DL.getTypeStoreSize(AllocaTy);
- uint64_t RelBegin = I->BeginOffset - AllocBeginOffset;
- uint64_t RelEnd = I->EndOffset - AllocBeginOffset;
+ // While examining uses, we ensure that the alloca has a covering load or
+ // store. We don't want to widen the integer operations only to fail to
+ // promote due to some other unsplittable entry (which we may make splittable
+ // later). However, if there are only splittable uses, go ahead and assume
+ // that we cover the alloca.
+ bool WholeAllocaOp = (I != E) ? false : DL.isLegalInteger(SizeInBits);
- // We can't reasonably handle cases where the load or store extends past
- // the end of the aloca's type and into its padding.
- if (RelEnd > Size)
+ for (; I != E; ++I)
+ if (!isIntegerWideningViableForSlice(DL, AllocaTy, AllocBeginOffset, Size,
+ S, I, WholeAllocaOp))
return false;
- if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
- if (LI->isVolatile())
- return false;
- if (RelBegin == 0 && RelEnd == Size)
- WholeAllocaOp = true;
- if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
- if (ITy->getBitWidth() < TD.getTypeStoreSizeInBits(ITy))
- return false;
- continue;
- }
- // Non-integer loads need to be convertible from the alloca type so that
- // they are promotable.
- if (RelBegin != 0 || RelEnd != Size ||
- !canConvertValue(TD, AllocaTy, LI->getType()))
- return false;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
- Type *ValueTy = SI->getValueOperand()->getType();
- if (SI->isVolatile())
- return false;
- if (RelBegin == 0 && RelEnd == Size)
- WholeAllocaOp = true;
- if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
- if (ITy->getBitWidth() < TD.getTypeStoreSizeInBits(ITy))
- return false;
- continue;
- }
- // Non-integer stores need to be convertible to the alloca type so that
- // they are promotable.
- if (RelBegin != 0 || RelEnd != Size ||
- !canConvertValue(TD, ValueTy, AllocaTy))
- return false;
- } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
- if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
- return false;
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
- const AllocaPartitioning::MemTransferOffsets &MTO
- = P.getMemTransferOffsets(*MTI);
- if (!MTO.IsSplittable)
- return false;
- }
- } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
- if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
- II->getIntrinsicID() != Intrinsic::lifetime_end)
- return false;
- } else {
+ for (ArrayRef<AllocaSlices::iterator>::const_iterator SUI = SplitUses.begin(),
+ SUE = SplitUses.end();
+ SUI != SUE; ++SUI)
+ if (!isIntegerWideningViableForSlice(DL, AllocaTy, AllocBeginOffset, Size,
+ S, *SUI, WholeAllocaOp))
return false;
- }
- }
+
return WholeAllocaOp;
}
@@ -2335,19 +1894,19 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
}
namespace {
-/// \brief Visitor to rewrite instructions using a partition of an alloca to
-/// use a new alloca.
+/// \brief Visitor to rewrite instructions using p particular slice of an alloca
+/// to use a new alloca.
///
/// Also implements the rewriting to vector-based accesses when the partition
/// passes the isVectorPromotionViable predicate. Most of the rewriting logic
/// lives here.
-class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
- bool> {
+class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
// Befriend the base class so it can delegate to private visit methods.
- friend class llvm::InstVisitor<AllocaPartitionRewriter, bool>;
+ friend class llvm::InstVisitor<AllocaSliceRewriter, bool>;
+ typedef llvm::InstVisitor<AllocaSliceRewriter, bool> Base;
- const DataLayout &TD;
- AllocaPartitioning &P;
+ const DataLayout &DL;
+ AllocaSlices &S;
SROA &Pass;
AllocaInst &OldAI, &NewAI;
const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
@@ -2372,106 +1931,112 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
// integer type will be stored here for easy access during rewriting.
IntegerType *IntTy;
- // The offset of the partition user currently being rewritten.
+ // The offset of the slice currently being rewritten.
uint64_t BeginOffset, EndOffset;
+ bool IsSplittable;
bool IsSplit;
Use *OldUse;
Instruction *OldPtr;
+ // Output members carrying state about the result of visiting and rewriting
+ // the slice of the alloca.
+ bool IsUsedByRewrittenSpeculatableInstructions;
+
// Utility IR builder, whose name prefix is setup for each visited use, and
// the insertion point is set to point to the user.
IRBuilderTy IRB;
public:
- AllocaPartitionRewriter(const DataLayout &TD, AllocaPartitioning &P,
- AllocaPartitioning::iterator PI,
- SROA &Pass, AllocaInst &OldAI, AllocaInst &NewAI,
- uint64_t NewBeginOffset, uint64_t NewEndOffset)
- : TD(TD), P(P), Pass(Pass),
- OldAI(OldAI), NewAI(NewAI),
- NewAllocaBeginOffset(NewBeginOffset),
- NewAllocaEndOffset(NewEndOffset),
- NewAllocaTy(NewAI.getAllocatedType()),
- VecTy(), ElementTy(), ElementSize(), IntTy(),
- BeginOffset(), EndOffset(), IsSplit(), OldUse(), OldPtr(),
- IRB(NewAI.getContext(), ConstantFolder()) {
- }
-
- /// \brief Visit the users of the alloca partition and rewrite them.
- bool visitUsers(AllocaPartitioning::const_use_iterator I,
- AllocaPartitioning::const_use_iterator E) {
- if (isVectorPromotionViable(TD, NewAI.getAllocatedType(), P,
- NewAllocaBeginOffset, NewAllocaEndOffset,
- I, E)) {
- ++NumVectorized;
- VecTy = cast<VectorType>(NewAI.getAllocatedType());
- ElementTy = VecTy->getElementType();
- assert((TD.getTypeSizeInBits(VecTy->getScalarType()) % 8) == 0 &&
+ AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &S, SROA &Pass,
+ AllocaInst &OldAI, AllocaInst &NewAI,
+ uint64_t NewBeginOffset, uint64_t NewEndOffset,
+ bool IsVectorPromotable = false,
+ bool IsIntegerPromotable = false)
+ : DL(DL), S(S), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
+ NewAllocaBeginOffset(NewBeginOffset), NewAllocaEndOffset(NewEndOffset),
+ NewAllocaTy(NewAI.getAllocatedType()),
+ VecTy(IsVectorPromotable ? cast<VectorType>(NewAllocaTy) : 0),
+ ElementTy(VecTy ? VecTy->getElementType() : 0),
+ ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0),
+ IntTy(IsIntegerPromotable
+ ? Type::getIntNTy(
+ NewAI.getContext(),
+ DL.getTypeSizeInBits(NewAI.getAllocatedType()))
+ : 0),
+ BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(),
+ OldPtr(), IsUsedByRewrittenSpeculatableInstructions(false),
+ IRB(NewAI.getContext(), ConstantFolder()) {
+ if (VecTy) {
+ assert((DL.getTypeSizeInBits(ElementTy) % 8) == 0 &&
"Only multiple-of-8 sized vector elements are viable");
- ElementSize = TD.getTypeSizeInBits(VecTy->getScalarType()) / 8;
- } else if (isIntegerWideningViable(TD, NewAI.getAllocatedType(),
- NewAllocaBeginOffset, P, I, E)) {
- IntTy = Type::getIntNTy(NewAI.getContext(),
- TD.getTypeSizeInBits(NewAI.getAllocatedType()));
+ ++NumVectorized;
}
+ assert((!IsVectorPromotable && !IsIntegerPromotable) ||
+ IsVectorPromotable != IsIntegerPromotable);
+ }
+
+ bool visit(AllocaSlices::const_iterator I) {
bool CanSROA = true;
- for (; I != E; ++I) {
- if (!I->getUse())
- continue; // Skip dead uses.
- BeginOffset = I->BeginOffset;
- EndOffset = I->EndOffset;
- IsSplit = I->isSplit();
- OldUse = I->getUse();
- OldPtr = cast<Instruction>(OldUse->get());
-
- Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
- IRB.SetInsertPoint(OldUserI);
- IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
- IRB.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) +
- ".");
-
- CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
- }
- if (VecTy) {
+ BeginOffset = I->beginOffset();
+ EndOffset = I->endOffset();
+ IsSplittable = I->isSplittable();
+ IsSplit =
+ BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
+
+ OldUse = I->getUse();
+ OldPtr = cast<Instruction>(OldUse->get());
+
+ Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
+ IRB.SetInsertPoint(OldUserI);
+ IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
+ IRB.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) + ".");
+
+ CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
+ if (VecTy || IntTy)
assert(CanSROA);
- VecTy = 0;
- ElementTy = 0;
- ElementSize = 0;
- }
- if (IntTy) {
- assert(CanSROA);
- IntTy = 0;
- }
return CanSROA;
}
+ /// \brief Query whether this slice is used by speculatable instructions after
+ /// rewriting.
+ ///
+ /// These instructions (PHIs and Selects currently) require the alloca slice
+ /// to run back through the rewriter. Thus, they are promotable, but not on
+ /// this iteration. This is distinct from a slice which is unpromotable for
+ /// some other reason, in which case we don't even want to perform the
+ /// speculation. This can be querried at any time and reflects whether (at
+ /// that point) a visit call has rewritten a speculatable instruction on the
+ /// current slice.
+ bool isUsedByRewrittenSpeculatableInstructions() const {
+ return IsUsedByRewrittenSpeculatableInstructions;
+ }
+
private:
+ // Make sure the other visit overloads are visible.
+ using Base::visit;
+
// Every instruction which can end up as a user must have a rewrite rule.
bool visitInstruction(Instruction &I) {
DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n");
llvm_unreachable("No rewrite rule for this instruction!");
}
- Value *getAdjustedAllocaPtr(IRBuilderTy &IRB, Type *PointerTy) {
- assert(BeginOffset >= NewAllocaBeginOffset);
- APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset);
- return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy);
+ Value *getAdjustedAllocaPtr(IRBuilderTy &IRB, uint64_t Offset,
+ Type *PointerTy) {
+ assert(Offset >= NewAllocaBeginOffset);
+ return getAdjustedPtr(IRB, DL, &NewAI, APInt(DL.getPointerSizeInBits(),
+ Offset - NewAllocaBeginOffset),
+ PointerTy);
}
/// \brief Compute suitable alignment to access an offset into the new alloca.
unsigned getOffsetAlign(uint64_t Offset) {
unsigned NewAIAlign = NewAI.getAlignment();
if (!NewAIAlign)
- NewAIAlign = TD.getABITypeAlignment(NewAI.getAllocatedType());
+ NewAIAlign = DL.getABITypeAlignment(NewAI.getAllocatedType());
return MinAlign(NewAIAlign, Offset);
}
- /// \brief Compute suitable alignment to access this partition of the new
- /// alloca.
- unsigned getPartitionAlign() {
- return getOffsetAlign(BeginOffset - NewAllocaBeginOffset);
- }
-
/// \brief Compute suitable alignment to access a type at an offset of the
/// new alloca.
///
@@ -2479,15 +2044,7 @@ private:
/// otherwise returns the maximal suitable alignment.
unsigned getOffsetTypeAlign(Type *Ty, uint64_t Offset) {
unsigned Align = getOffsetAlign(Offset);
- return Align == TD.getABITypeAlignment(Ty) ? 0 : Align;
- }
-
- /// \brief Compute suitable alignment to access a type at the beginning of
- /// this partition of the new alloca.
- ///
- /// See \c getOffsetTypeAlign for details; this routine delegates to it.
- unsigned getPartitionTypeAlign(Type *Ty) {
- return getOffsetTypeAlign(Ty, BeginOffset - NewAllocaBeginOffset);
+ return Align == DL.getABITypeAlignment(Ty) ? 0 : Align;
}
unsigned getIndex(uint64_t Offset) {
@@ -2505,9 +2062,10 @@ private:
Pass.DeadInsts.insert(I);
}
- Value *rewriteVectorizedLoadInst() {
- unsigned BeginIndex = getIndex(BeginOffset);
- unsigned EndIndex = getIndex(EndOffset);
+ Value *rewriteVectorizedLoadInst(uint64_t NewBeginOffset,
+ uint64_t NewEndOffset) {
+ unsigned BeginIndex = getIndex(NewBeginOffset);
+ unsigned EndIndex = getIndex(NewEndOffset);
assert(EndIndex > BeginIndex && "Empty vector!");
Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
@@ -2515,16 +2073,17 @@ private:
return extractVector(IRB, V, BeginIndex, EndIndex, "vec");
}
- Value *rewriteIntegerLoad(LoadInst &LI) {
+ Value *rewriteIntegerLoad(LoadInst &LI, uint64_t NewBeginOffset,
+ uint64_t NewEndOffset) {
assert(IntTy && "We cannot insert an integer to the alloca");
assert(!LI.isVolatile());
Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"load");
- V = convertValue(TD, IRB, V, IntTy);
- assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
- uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
- if (Offset > 0 || EndOffset < NewAllocaEndOffset)
- V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset,
+ V = convertValue(DL, IRB, V, IntTy);
+ assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
+ if (Offset > 0 || NewEndOffset < NewAllocaEndOffset)
+ V = extractInteger(DL, IRB, V, cast<IntegerType>(LI.getType()), Offset,
"extract");
return V;
}
@@ -2534,37 +2093,44 @@ private:
Value *OldOp = LI.getOperand(0);
assert(OldOp == OldPtr);
- uint64_t Size = EndOffset - BeginOffset;
+ // Compute the intersecting offset range.
+ assert(BeginOffset < NewAllocaEndOffset);
+ assert(EndOffset > NewAllocaBeginOffset);
+ uint64_t NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
+ uint64_t NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
+
+ uint64_t Size = NewEndOffset - NewBeginOffset;
Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8)
: LI.getType();
bool IsPtrAdjusted = false;
Value *V;
if (VecTy) {
- V = rewriteVectorizedLoadInst();
+ V = rewriteVectorizedLoadInst(NewBeginOffset, NewEndOffset);
} else if (IntTy && LI.getType()->isIntegerTy()) {
- V = rewriteIntegerLoad(LI);
- } else if (BeginOffset == NewAllocaBeginOffset &&
- canConvertValue(TD, NewAllocaTy, LI.getType())) {
+ V = rewriteIntegerLoad(LI, NewBeginOffset, NewEndOffset);
+ } else if (NewBeginOffset == NewAllocaBeginOffset &&
+ canConvertValue(DL, NewAllocaTy, LI.getType())) {
V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
LI.isVolatile(), "load");
} else {
Type *LTy = TargetTy->getPointerTo();
- V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy),
- getPartitionTypeAlign(TargetTy),
- LI.isVolatile(), "load");
+ V = IRB.CreateAlignedLoad(
+ getAdjustedAllocaPtr(IRB, NewBeginOffset, LTy),
+ getOffsetTypeAlign(TargetTy, NewBeginOffset - NewAllocaBeginOffset),
+ LI.isVolatile(), "load");
IsPtrAdjusted = true;
}
- V = convertValue(TD, IRB, V, TargetTy);
+ V = convertValue(DL, IRB, V, TargetTy);
if (IsSplit) {
assert(!LI.isVolatile());
assert(LI.getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
- assert(Size < TD.getTypeStoreSize(LI.getType()) &&
+ assert(Size < DL.getTypeStoreSize(LI.getType()) &&
"Split load isn't smaller than original load");
assert(LI.getType()->getIntegerBitWidth() ==
- TD.getTypeStoreSizeInBits(LI.getType()) &&
+ DL.getTypeStoreSizeInBits(LI.getType()) &&
"Non-byte-multiple bit width");
// Move the insertion point just past the load so that we can refer to it.
IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
@@ -2574,7 +2140,7 @@ private:
// LI only used for this computation.
Value *Placeholder
= new LoadInst(UndefValue::get(LI.getType()->getPointerTo()));
- V = insertInteger(TD, IRB, Placeholder, V, BeginOffset,
+ V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset,
"insert");
LI.replaceAllUsesWith(V);
Placeholder->replaceAllUsesWith(&LI);
@@ -2589,24 +2155,26 @@ private:
return !LI.isVolatile() && !IsPtrAdjusted;
}
- bool rewriteVectorizedStoreInst(Value *V,
- StoreInst &SI, Value *OldOp) {
- unsigned BeginIndex = getIndex(BeginOffset);
- unsigned EndIndex = getIndex(EndOffset);
- assert(EndIndex > BeginIndex && "Empty vector!");
- unsigned NumElements = EndIndex - BeginIndex;
- assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
- Type *PartitionTy
- = (NumElements == 1) ? ElementTy
- : VectorType::get(ElementTy, NumElements);
- if (V->getType() != PartitionTy)
- V = convertValue(TD, IRB, V, PartitionTy);
-
- // Mix in the existing elements.
- Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- "load");
- V = insertVector(IRB, Old, V, BeginIndex, "vec");
+ bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp,
+ uint64_t NewBeginOffset,
+ uint64_t NewEndOffset) {
+ if (V->getType() != VecTy) {
+ unsigned BeginIndex = getIndex(NewBeginOffset);
+ unsigned EndIndex = getIndex(NewEndOffset);
+ assert(EndIndex > BeginIndex && "Empty vector!");
+ unsigned NumElements = EndIndex - BeginIndex;
+ assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
+ Type *SliceTy =
+ (NumElements == 1) ? ElementTy
+ : VectorType::get(ElementTy, NumElements);
+ if (V->getType() != SliceTy)
+ V = convertValue(DL, IRB, V, SliceTy);
+ // Mix in the existing elements.
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ "load");
+ V = insertVector(IRB, Old, V, BeginIndex, "vec");
+ }
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
Pass.DeadInsts.insert(&SI);
@@ -2615,19 +2183,20 @@ private:
return true;
}
- bool rewriteIntegerStore(Value *V, StoreInst &SI) {
+ bool rewriteIntegerStore(Value *V, StoreInst &SI,
+ uint64_t NewBeginOffset, uint64_t NewEndOffset) {
assert(IntTy && "We cannot extract an integer from the alloca");
assert(!SI.isVolatile());
- if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
+ if (DL.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"oldload");
- Old = convertValue(TD, IRB, Old, IntTy);
+ Old = convertValue(DL, IRB, Old, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
- V = insertInteger(TD, IRB, Old, SI.getValueOperand(), Offset,
+ V = insertInteger(DL, IRB, Old, SI.getValueOperand(), Offset,
"insert");
}
- V = convertValue(TD, IRB, V, NewAllocaTy);
+ V = convertValue(DL, IRB, V, NewAllocaTy);
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
Pass.DeadInsts.insert(&SI);
(void)Store;
@@ -2648,37 +2217,45 @@ private:
if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
Pass.PostPromotionWorklist.insert(AI);
- uint64_t Size = EndOffset - BeginOffset;
- if (Size < TD.getTypeStoreSize(V->getType())) {
+ // Compute the intersecting offset range.
+ assert(BeginOffset < NewAllocaEndOffset);
+ assert(EndOffset > NewAllocaBeginOffset);
+ uint64_t NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
+ uint64_t NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
+
+ uint64_t Size = NewEndOffset - NewBeginOffset;
+ if (Size < DL.getTypeStoreSize(V->getType())) {
assert(!SI.isVolatile());
- assert(IsSplit && "A seemingly split store isn't splittable");
assert(V->getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
assert(V->getType()->getIntegerBitWidth() ==
- TD.getTypeStoreSizeInBits(V->getType()) &&
+ DL.getTypeStoreSizeInBits(V->getType()) &&
"Non-byte-multiple bit width");
IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
- V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset,
+ V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset,
"extract");
}
if (VecTy)
- return rewriteVectorizedStoreInst(V, SI, OldOp);
+ return rewriteVectorizedStoreInst(V, SI, OldOp, NewBeginOffset,
+ NewEndOffset);
if (IntTy && V->getType()->isIntegerTy())
- return rewriteIntegerStore(V, SI);
+ return rewriteIntegerStore(V, SI, NewBeginOffset, NewEndOffset);
StoreInst *NewSI;
- if (BeginOffset == NewAllocaBeginOffset &&
- EndOffset == NewAllocaEndOffset &&
- canConvertValue(TD, V->getType(), NewAllocaTy)) {
- V = convertValue(TD, IRB, V, NewAllocaTy);
+ if (NewBeginOffset == NewAllocaBeginOffset &&
+ NewEndOffset == NewAllocaEndOffset &&
+ canConvertValue(DL, V->getType(), NewAllocaTy)) {
+ V = convertValue(DL, IRB, V, NewAllocaTy);
NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
SI.isVolatile());
} else {
- Value *NewPtr = getAdjustedAllocaPtr(IRB, V->getType()->getPointerTo());
- NewSI = IRB.CreateAlignedStore(V, NewPtr,
- getPartitionTypeAlign(V->getType()),
- SI.isVolatile());
+ Value *NewPtr = getAdjustedAllocaPtr(IRB, NewBeginOffset,
+ V->getType()->getPointerTo());
+ NewSI = IRB.CreateAlignedStore(
+ V, NewPtr, getOffsetTypeAlign(
+ V->getType(), NewBeginOffset - NewAllocaBeginOffset),
+ SI.isVolatile());
}
(void)NewSI;
Pass.DeadInsts.insert(&SI);
@@ -2729,9 +2306,12 @@ private:
// If the memset has a variable size, it cannot be split, just adjust the
// pointer to the new alloca.
if (!isa<Constant>(II.getLength())) {
- II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType()));
+ assert(!IsSplit);
+ assert(BeginOffset >= NewAllocaBeginOffset);
+ II.setDest(
+ getAdjustedAllocaPtr(IRB, BeginOffset, II.getRawDest()->getType()));
Type *CstTy = II.getAlignmentCst()->getType();
- II.setAlignment(ConstantInt::get(CstTy, getPartitionAlign()));
+ II.setAlignment(ConstantInt::get(CstTy, getOffsetAlign(BeginOffset)));
deleteIfTriviallyDead(OldPtr);
return false;
@@ -2743,21 +2323,26 @@ private:
Type *AllocaTy = NewAI.getAllocatedType();
Type *ScalarTy = AllocaTy->getScalarType();
+ // Compute the intersecting offset range.
+ assert(BeginOffset < NewAllocaEndOffset);
+ assert(EndOffset > NewAllocaBeginOffset);
+ uint64_t NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
+ uint64_t NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
+ uint64_t SliceOffset = NewBeginOffset - NewAllocaBeginOffset;
+
// If this doesn't map cleanly onto the alloca type, and that type isn't
// a single value type, just emit a memset.
if (!VecTy && !IntTy &&
- (BeginOffset != NewAllocaBeginOffset ||
- EndOffset != NewAllocaEndOffset ||
+ (BeginOffset > NewAllocaBeginOffset ||
+ EndOffset < NewAllocaEndOffset ||
!AllocaTy->isSingleValueType() ||
- !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)) ||
- TD.getTypeSizeInBits(ScalarTy)%8 != 0)) {
+ !DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy)) ||
+ DL.getTypeSizeInBits(ScalarTy)%8 != 0)) {
Type *SizeTy = II.getLength()->getType();
- Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset);
- CallInst *New
- = IRB.CreateMemSet(getAdjustedAllocaPtr(IRB,
- II.getRawDest()->getType()),
- II.getValue(), Size, getPartitionAlign(),
- II.isVolatile());
+ Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
+ CallInst *New = IRB.CreateMemSet(
+ getAdjustedAllocaPtr(IRB, NewBeginOffset, II.getRawDest()->getType()),
+ II.getValue(), Size, getOffsetAlign(SliceOffset), II.isVolatile());
(void)New;
DEBUG(dbgs() << " to: " << *New << "\n");
return false;
@@ -2774,15 +2359,15 @@ private:
// If this is a memset of a vectorized alloca, insert it.
assert(ElementTy == ScalarTy);
- unsigned BeginIndex = getIndex(BeginOffset);
- unsigned EndIndex = getIndex(EndOffset);
+ unsigned BeginIndex = getIndex(NewBeginOffset);
+ unsigned EndIndex = getIndex(NewEndOffset);
assert(EndIndex > BeginIndex && "Empty vector!");
unsigned NumElements = EndIndex - BeginIndex;
assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
Value *Splat =
- getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ElementTy) / 8);
- Splat = convertValue(TD, IRB, Splat, ElementTy);
+ getIntegerSplat(II.getValue(), DL.getTypeSizeInBits(ElementTy) / 8);
+ Splat = convertValue(DL, IRB, Splat, ElementTy);
if (NumElements > 1)
Splat = getVectorSplat(Splat, NumElements);
@@ -2794,32 +2379,31 @@ private:
// set integer.
assert(!II.isVolatile());
- uint64_t Size = EndOffset - BeginOffset;
+ uint64_t Size = NewEndOffset - NewBeginOffset;
V = getIntegerSplat(II.getValue(), Size);
if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
EndOffset != NewAllocaBeginOffset)) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"oldload");
- Old = convertValue(TD, IRB, Old, IntTy);
- assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
- uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
- V = insertInteger(TD, IRB, Old, V, Offset, "insert");
+ Old = convertValue(DL, IRB, Old, IntTy);
+ uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
+ V = insertInteger(DL, IRB, Old, V, Offset, "insert");
} else {
assert(V->getType() == IntTy &&
"Wrong type for an alloca wide integer!");
}
- V = convertValue(TD, IRB, V, AllocaTy);
+ V = convertValue(DL, IRB, V, AllocaTy);
} else {
// Established these invariants above.
- assert(BeginOffset == NewAllocaBeginOffset);
- assert(EndOffset == NewAllocaEndOffset);
+ assert(NewBeginOffset == NewAllocaBeginOffset);
+ assert(NewEndOffset == NewAllocaEndOffset);
- V = getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ScalarTy) / 8);
+ V = getIntegerSplat(II.getValue(), DL.getTypeSizeInBits(ScalarTy) / 8);
if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
V = getVectorSplat(V, AllocaVecTy->getNumElements());
- V = convertValue(TD, IRB, V, AllocaTy);
+ V = convertValue(DL, IRB, V, AllocaTy);
}
Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
@@ -2835,21 +2419,25 @@ private:
DEBUG(dbgs() << " original: " << II << "\n");
+ // Compute the intersecting offset range.
+ assert(BeginOffset < NewAllocaEndOffset);
+ assert(EndOffset > NewAllocaBeginOffset);
+ uint64_t NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
+ uint64_t NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
+
assert(II.getRawSource() == OldPtr || II.getRawDest() == OldPtr);
bool IsDest = II.getRawDest() == OldPtr;
- const AllocaPartitioning::MemTransferOffsets &MTO
- = P.getMemTransferOffsets(II);
-
// Compute the relative offset within the transfer.
- unsigned IntPtrWidth = TD.getPointerSizeInBits();
- APInt RelOffset(IntPtrWidth, BeginOffset - (IsDest ? MTO.DestBegin
- : MTO.SourceBegin));
+ unsigned IntPtrWidth = DL.getPointerSizeInBits();
+ APInt RelOffset(IntPtrWidth, NewBeginOffset - BeginOffset);
unsigned Align = II.getAlignment();
+ uint64_t SliceOffset = NewBeginOffset - NewAllocaBeginOffset;
if (Align > 1)
- Align = MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(),
- MinAlign(II.getAlignment(), getPartitionAlign()));
+ Align =
+ MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(),
+ MinAlign(II.getAlignment(), getOffsetAlign(SliceOffset)));
// For unsplit intrinsics, we simply modify the source and destination
// pointers in place. This isn't just an optimization, it is a matter of
@@ -2858,12 +2446,14 @@ private:
// a variable length. We may also be dealing with memmove instead of
// memcpy, and so simply updating the pointers is the necessary for us to
// update both source and dest of a single call.
- if (!MTO.IsSplittable) {
+ if (!IsSplittable) {
Value *OldOp = IsDest ? II.getRawDest() : II.getRawSource();
if (IsDest)
- II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType()));
+ II.setDest(
+ getAdjustedAllocaPtr(IRB, BeginOffset, II.getRawDest()->getType()));
else
- II.setSource(getAdjustedAllocaPtr(IRB, II.getRawSource()->getType()));
+ II.setSource(getAdjustedAllocaPtr(IRB, BeginOffset,
+ II.getRawSource()->getType()));
Type *CstTy = II.getAlignmentCst()->getType();
II.setAlignment(ConstantInt::get(CstTy, Align));
@@ -2881,24 +2471,21 @@ private:
// If this doesn't map cleanly onto the alloca type, and that type isn't
// a single value type, just emit a memcpy.
bool EmitMemCpy
- = !VecTy && !IntTy && (BeginOffset != NewAllocaBeginOffset ||
- EndOffset != NewAllocaEndOffset ||
+ = !VecTy && !IntTy && (BeginOffset > NewAllocaBeginOffset ||
+ EndOffset < NewAllocaEndOffset ||
!NewAI.getAllocatedType()->isSingleValueType());
// If we're just going to emit a memcpy, the alloca hasn't changed, and the
// size hasn't been shrunk based on analysis of the viable range, this is
// a no-op.
if (EmitMemCpy && &OldAI == &NewAI) {
- uint64_t OrigBegin = IsDest ? MTO.DestBegin : MTO.SourceBegin;
- uint64_t OrigEnd = IsDest ? MTO.DestEnd : MTO.SourceEnd;
// Ensure the start lines up.
- assert(BeginOffset == OrigBegin);
- (void)OrigBegin;
+ assert(NewBeginOffset == BeginOffset);
// Rewrite the size as needed.
- if (EndOffset != OrigEnd)
+ if (NewEndOffset != EndOffset)
II.setLength(ConstantInt::get(II.getLength()->getType(),
- EndOffset - BeginOffset));
+ NewEndOffset - NewBeginOffset));
return false;
}
// Record this instruction for deletion.
@@ -2917,13 +2504,13 @@ private:
// Compute the other pointer, folding as much as possible to produce
// a single, simple GEP in most cases.
- OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy);
+ OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, RelOffset, OtherPtrTy);
- Value *OurPtr
- = getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType()
- : II.getRawSource()->getType());
+ Value *OurPtr = getAdjustedAllocaPtr(
+ IRB, NewBeginOffset,
+ IsDest ? II.getRawDest()->getType() : II.getRawSource()->getType());
Type *SizeTy = II.getLength()->getType();
- Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset);
+ Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
CallInst *New = IRB.CreateMemCpy(IsDest ? OurPtr : OtherPtr,
IsDest ? OtherPtr : OurPtr,
@@ -2939,11 +2526,11 @@ private:
if (!Align)
Align = 1;
- bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
- EndOffset == NewAllocaEndOffset;
- uint64_t Size = EndOffset - BeginOffset;
- unsigned BeginIndex = VecTy ? getIndex(BeginOffset) : 0;
- unsigned EndIndex = VecTy ? getIndex(EndOffset) : 0;
+ bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
+ NewEndOffset == NewAllocaEndOffset;
+ uint64_t Size = NewEndOffset - NewBeginOffset;
+ unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
+ unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
unsigned NumElements = EndIndex - BeginIndex;
IntegerType *SubIntTy
= IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
@@ -2960,7 +2547,7 @@ private:
OtherPtrTy = SubIntTy->getPointerTo();
}
- Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy);
+ Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, RelOffset, OtherPtrTy);
Value *DstPtr = &NewAI;
if (!IsDest)
std::swap(SrcPtr, DstPtr);
@@ -2973,10 +2560,9 @@ private:
} else if (IntTy && !IsWholeAlloca && !IsDest) {
Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"load");
- Src = convertValue(TD, IRB, Src, IntTy);
- assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
- uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
- Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, "extract");
+ Src = convertValue(DL, IRB, Src, IntTy);
+ uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
+ Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract");
} else {
Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
"copyload");
@@ -2989,11 +2575,10 @@ private:
} else if (IntTy && !IsWholeAlloca && IsDest) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"oldload");
- Old = convertValue(TD, IRB, Old, IntTy);
- assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
- uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
- Src = insertInteger(TD, IRB, Old, Src, Offset, "insert");
- Src = convertValue(TD, IRB, Src, NewAllocaTy);
+ Old = convertValue(DL, IRB, Old, IntTy);
+ uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
+ Src = insertInteger(DL, IRB, Old, Src, Offset, "insert");
+ Src = convertValue(DL, IRB, Src, NewAllocaTy);
}
StoreInst *Store = cast<StoreInst>(
@@ -3009,13 +2594,20 @@ private:
DEBUG(dbgs() << " original: " << II << "\n");
assert(II.getArgOperand(1) == OldPtr);
+ // Compute the intersecting offset range.
+ assert(BeginOffset < NewAllocaEndOffset);
+ assert(EndOffset > NewAllocaBeginOffset);
+ uint64_t NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
+ uint64_t NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
+
// Record this instruction for deletion.
Pass.DeadInsts.insert(&II);
ConstantInt *Size
= ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
- EndOffset - BeginOffset);
- Value *Ptr = getAdjustedAllocaPtr(IRB, II.getArgOperand(1)->getType());
+ NewEndOffset - NewBeginOffset);
+ Value *Ptr =
+ getAdjustedAllocaPtr(IRB, NewBeginOffset, II.getArgOperand(1)->getType());
Value *New;
if (II.getIntrinsicID() == Intrinsic::lifetime_start)
New = IRB.CreateLifetimeStart(Ptr, Size);
@@ -3029,30 +2621,45 @@ private:
bool visitPHINode(PHINode &PN) {
DEBUG(dbgs() << " original: " << PN << "\n");
+ assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable");
+ assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable");
// We would like to compute a new pointer in only one place, but have it be
// as local as possible to the PHI. To do that, we re-use the location of
// the old pointer, which necessarily must be in the right position to
// dominate the PHI.
- IRBuilderTy PtrBuilder(cast<Instruction>(OldPtr));
+ IRBuilderTy PtrBuilder(OldPtr);
PtrBuilder.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) +
".");
- Value *NewPtr = getAdjustedAllocaPtr(PtrBuilder, OldPtr->getType());
+ Value *NewPtr =
+ getAdjustedAllocaPtr(PtrBuilder, BeginOffset, OldPtr->getType());
// Replace the operands which were using the old pointer.
std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr);
DEBUG(dbgs() << " to: " << PN << "\n");
deleteIfTriviallyDead(OldPtr);
- return false;
+
+ // Check whether we can speculate this PHI node, and if so remember that
+ // fact and queue it up for another iteration after the speculation
+ // occurs.
+ if (isSafePHIToSpeculate(PN, &DL)) {
+ Pass.SpeculatablePHIs.insert(&PN);
+ IsUsedByRewrittenSpeculatableInstructions = true;
+ return true;
+ }
+
+ return false; // PHIs can't be promoted on their own.
}
bool visitSelectInst(SelectInst &SI) {
DEBUG(dbgs() << " original: " << SI << "\n");
assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
"Pointer isn't an operand!");
+ assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable");
+ assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable");
- Value *NewPtr = getAdjustedAllocaPtr(IRB, OldPtr->getType());
+ Value *NewPtr = getAdjustedAllocaPtr(IRB, BeginOffset, OldPtr->getType());
// Replace the operands which were using the old pointer.
if (SI.getOperand(1) == OldPtr)
SI.setOperand(1, NewPtr);
@@ -3061,7 +2668,17 @@ private:
DEBUG(dbgs() << " to: " << SI << "\n");
deleteIfTriviallyDead(OldPtr);
- return false;
+
+ // Check whether we can speculate this select instruction, and if so
+ // remember that fact and queue it up for another iteration after the
+ // speculation occurs.
+ if (isSafeSelectToSpeculate(SI, &DL)) {
+ Pass.SpeculatableSelects.insert(&SI);
+ IsUsedByRewrittenSpeculatableInstructions = true;
+ return true;
+ }
+
+ return false; // Selects can't be promoted on their own.
}
};
@@ -3077,7 +2694,7 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
// Befriend the base class so it can delegate to private visit methods.
friend class llvm::InstVisitor<AggLoadStoreRewriter, bool>;
- const DataLayout &TD;
+ const DataLayout &DL;
/// Queue of pointer uses to analyze and potentially rewrite.
SmallVector<Use *, 8> Queue;
@@ -3090,7 +2707,7 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
Use *U;
public:
- AggLoadStoreRewriter(const DataLayout &TD) : TD(TD) {}
+ AggLoadStoreRewriter(const DataLayout &DL) : DL(DL) {}
/// Rewrite loads and stores through a pointer and all pointers derived from
/// it.
@@ -3319,12 +2936,12 @@ static Type *stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty) {
/// when the size or offset cause either end of type-based partition to be off.
/// Also, this is a best-effort routine. It is reasonable to give up and not
/// return a type if necessary.
-static Type *getTypePartition(const DataLayout &TD, Type *Ty,
+static Type *getTypePartition(const DataLayout &DL, Type *Ty,
uint64_t Offset, uint64_t Size) {
- if (Offset == 0 && TD.getTypeAllocSize(Ty) == Size)
- return stripAggregateTypeWrapping(TD, Ty);
- if (Offset > TD.getTypeAllocSize(Ty) ||
- (TD.getTypeAllocSize(Ty) - Offset) < Size)
+ if (Offset == 0 && DL.getTypeAllocSize(Ty) == Size)
+ return stripAggregateTypeWrapping(DL, Ty);
+ if (Offset > DL.getTypeAllocSize(Ty) ||
+ (DL.getTypeAllocSize(Ty) - Offset) < Size)
return 0;
if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {
@@ -3333,7 +2950,7 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
return 0;
Type *ElementTy = SeqTy->getElementType();
- uint64_t ElementSize = TD.getTypeAllocSize(ElementTy);
+ uint64_t ElementSize = DL.getTypeAllocSize(ElementTy);
uint64_t NumSkippedElements = Offset / ElementSize;
if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) {
if (NumSkippedElements >= ArrTy->getNumElements())
@@ -3350,12 +2967,12 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
if ((Offset + Size) > ElementSize)
return 0;
// Recurse through the element type trying to peel off offset bytes.
- return getTypePartition(TD, ElementTy, Offset, Size);
+ return getTypePartition(DL, ElementTy, Offset, Size);
}
assert(Offset == 0);
if (Size == ElementSize)
- return stripAggregateTypeWrapping(TD, ElementTy);
+ return stripAggregateTypeWrapping(DL, ElementTy);
assert(Size > ElementSize);
uint64_t NumElements = Size / ElementSize;
if (NumElements * ElementSize != Size)
@@ -3367,7 +2984,7 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
if (!STy)
return 0;
- const StructLayout *SL = TD.getStructLayout(STy);
+ const StructLayout *SL = DL.getStructLayout(STy);
if (Offset >= SL->getSizeInBytes())
return 0;
uint64_t EndOffset = Offset + Size;
@@ -3378,7 +2995,7 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
Offset -= SL->getElementOffset(Index);
Type *ElementTy = STy->getElementType(Index);
- uint64_t ElementSize = TD.getTypeAllocSize(ElementTy);
+ uint64_t ElementSize = DL.getTypeAllocSize(ElementTy);
if (Offset >= ElementSize)
return 0; // The offset points into alignment padding.
@@ -3386,12 +3003,12 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
if (Offset > 0 || Size < ElementSize) {
if ((Offset + Size) > ElementSize)
return 0;
- return getTypePartition(TD, ElementTy, Offset, Size);
+ return getTypePartition(DL, ElementTy, Offset, Size);
}
assert(Offset == 0);
if (Size == ElementSize)
- return stripAggregateTypeWrapping(TD, ElementTy);
+ return stripAggregateTypeWrapping(DL, ElementTy);
StructType::element_iterator EI = STy->element_begin() + Index,
EE = STy->element_end();
@@ -3414,7 +3031,7 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
// Try to build up a sub-structure.
StructType *SubTy = StructType::get(STy->getContext(), makeArrayRef(EI, EE),
STy->isPacked());
- const StructLayout *SubSL = TD.getStructLayout(SubTy);
+ const StructLayout *SubSL = DL.getStructLayout(SubTy);
if (Size != SubSL->getSizeInBytes())
return 0; // The sub-struct doesn't have quite the size needed.
@@ -3431,113 +3048,280 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
/// appropriate new offsets. It also evaluates how successful the rewrite was
/// at enabling promotion and if it was successful queues the alloca to be
/// promoted.
-bool SROA::rewriteAllocaPartition(AllocaInst &AI,
- AllocaPartitioning &P,
- AllocaPartitioning::iterator PI) {
- uint64_t AllocaSize = PI->EndOffset - PI->BeginOffset;
- bool IsLive = false;
- for (AllocaPartitioning::use_iterator UI = P.use_begin(PI),
- UE = P.use_end(PI);
- UI != UE && !IsLive; ++UI)
- if (UI->getUse())
- IsLive = true;
- if (!IsLive)
- return false; // No live uses left of this partition.
-
- DEBUG(dbgs() << "Speculating PHIs and selects in partition "
- << "[" << PI->BeginOffset << "," << PI->EndOffset << ")\n");
-
- PHIOrSelectSpeculator Speculator(*TD, P, *this);
- DEBUG(dbgs() << " speculating ");
- DEBUG(P.print(dbgs(), PI, ""));
- Speculator.visitUsers(PI);
+bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
+ AllocaSlices::iterator B, AllocaSlices::iterator E,
+ int64_t BeginOffset, int64_t EndOffset,
+ ArrayRef<AllocaSlices::iterator> SplitUses) {
+ assert(BeginOffset < EndOffset);
+ uint64_t SliceSize = EndOffset - BeginOffset;
// Try to compute a friendly type for this partition of the alloca. This
// won't always succeed, in which case we fall back to a legal integer type
// or an i8 array of an appropriate size.
- Type *AllocaTy = 0;
- if (Type *PartitionTy = P.getCommonType(PI))
- if (TD->getTypeAllocSize(PartitionTy) >= AllocaSize)
- AllocaTy = PartitionTy;
- if (!AllocaTy)
- if (Type *PartitionTy = getTypePartition(*TD, AI.getAllocatedType(),
- PI->BeginOffset, AllocaSize))
- AllocaTy = PartitionTy;
- if ((!AllocaTy ||
- (AllocaTy->isArrayTy() &&
- AllocaTy->getArrayElementType()->isIntegerTy())) &&
- TD->isLegalInteger(AllocaSize * 8))
- AllocaTy = Type::getIntNTy(*C, AllocaSize * 8);
- if (!AllocaTy)
- AllocaTy = ArrayType::get(Type::getInt8Ty(*C), AllocaSize);
- assert(TD->getTypeAllocSize(AllocaTy) >= AllocaSize);
+ Type *SliceTy = 0;
+ if (Type *CommonUseTy = findCommonType(B, E, EndOffset))
+ if (DL->getTypeAllocSize(CommonUseTy) >= SliceSize)
+ SliceTy = CommonUseTy;
+ if (!SliceTy)
+ if (Type *TypePartitionTy = getTypePartition(*DL, AI.getAllocatedType(),
+ BeginOffset, SliceSize))
+ SliceTy = TypePartitionTy;
+ if ((!SliceTy || (SliceTy->isArrayTy() &&
+ SliceTy->getArrayElementType()->isIntegerTy())) &&
+ DL->isLegalInteger(SliceSize * 8))
+ SliceTy = Type::getIntNTy(*C, SliceSize * 8);
+ if (!SliceTy)
+ SliceTy = ArrayType::get(Type::getInt8Ty(*C), SliceSize);
+ assert(DL->getTypeAllocSize(SliceTy) >= SliceSize);
+
+ bool IsVectorPromotable = isVectorPromotionViable(
+ *DL, SliceTy, S, BeginOffset, EndOffset, B, E, SplitUses);
+
+ bool IsIntegerPromotable =
+ !IsVectorPromotable &&
+ isIntegerWideningViable(*DL, SliceTy, BeginOffset, S, B, E, SplitUses);
// Check for the case where we're going to rewrite to a new alloca of the
// exact same type as the original, and with the same access offsets. In that
// case, re-use the existing alloca, but still run through the rewriter to
// perform phi and select speculation.
AllocaInst *NewAI;
- if (AllocaTy == AI.getAllocatedType()) {
- assert(PI->BeginOffset == 0 &&
+ if (SliceTy == AI.getAllocatedType()) {
+ assert(BeginOffset == 0 &&
"Non-zero begin offset but same alloca type");
- assert(PI == P.begin() && "Begin offset is zero on later partition");
NewAI = &AI;
+ // FIXME: We should be able to bail at this point with "nothing changed".
+ // FIXME: We might want to defer PHI speculation until after here.
} else {
unsigned Alignment = AI.getAlignment();
if (!Alignment) {
// The minimum alignment which users can rely on when the explicit
// alignment is omitted or zero is that required by the ABI for this
// type.
- Alignment = TD->getABITypeAlignment(AI.getAllocatedType());
+ Alignment = DL->getABITypeAlignment(AI.getAllocatedType());
}
- Alignment = MinAlign(Alignment, PI->BeginOffset);
+ Alignment = MinAlign(Alignment, BeginOffset);
// If we will get at least this much alignment from the type alone, leave
// the alloca's alignment unconstrained.
- if (Alignment <= TD->getABITypeAlignment(AllocaTy))
+ if (Alignment <= DL->getABITypeAlignment(SliceTy))
Alignment = 0;
- NewAI = new AllocaInst(AllocaTy, 0, Alignment,
- AI.getName() + ".sroa." + Twine(PI - P.begin()),
- &AI);
+ NewAI = new AllocaInst(SliceTy, 0, Alignment,
+ AI.getName() + ".sroa." + Twine(B - S.begin()), &AI);
++NumNewAllocas;
}
DEBUG(dbgs() << "Rewriting alloca partition "
- << "[" << PI->BeginOffset << "," << PI->EndOffset << ") to: "
- << *NewAI << "\n");
+ << "[" << BeginOffset << "," << EndOffset << ") to: " << *NewAI
+ << "\n");
- // Track the high watermark of the post-promotion worklist. We will reset it
- // to this point if the alloca is not in fact scheduled for promotion.
+ // Track the high watermark on several worklists that are only relevant for
+ // promoted allocas. We will reset it to this point if the alloca is not in
+ // fact scheduled for promotion.
unsigned PPWOldSize = PostPromotionWorklist.size();
+ unsigned SPOldSize = SpeculatablePHIs.size();
+ unsigned SSOldSize = SpeculatableSelects.size();
+ unsigned NumUses = 0;
+
+ AllocaSliceRewriter Rewriter(*DL, S, *this, AI, *NewAI, BeginOffset,
+ EndOffset, IsVectorPromotable,
+ IsIntegerPromotable);
+ bool Promotable = true;
+ for (ArrayRef<AllocaSlices::iterator>::const_iterator SUI = SplitUses.begin(),
+ SUE = SplitUses.end();
+ SUI != SUE; ++SUI) {
+ DEBUG(dbgs() << " rewriting split ");
+ DEBUG(S.printSlice(dbgs(), *SUI, ""));
+ Promotable &= Rewriter.visit(*SUI);
+ ++NumUses;
+ }
+ for (AllocaSlices::iterator I = B; I != E; ++I) {
+ DEBUG(dbgs() << " rewriting ");
+ DEBUG(S.printSlice(dbgs(), I, ""));
+ Promotable &= Rewriter.visit(I);
+ ++NumUses;
+ }
+
+ NumAllocaPartitionUses += NumUses;
+ MaxUsesPerAllocaPartition =
+ std::max<unsigned>(NumUses, MaxUsesPerAllocaPartition);
- AllocaPartitionRewriter Rewriter(*TD, P, PI, *this, AI, *NewAI,
- PI->BeginOffset, PI->EndOffset);
- DEBUG(dbgs() << " rewriting ");
- DEBUG(P.print(dbgs(), PI, ""));
- bool Promotable = Rewriter.visitUsers(P.use_begin(PI), P.use_end(PI));
- if (Promotable) {
+ if (Promotable && !Rewriter.isUsedByRewrittenSpeculatableInstructions()) {
DEBUG(dbgs() << " and queuing for promotion\n");
PromotableAllocas.push_back(NewAI);
- } else if (NewAI != &AI) {
+ } else if (NewAI != &AI ||
+ (Promotable &&
+ Rewriter.isUsedByRewrittenSpeculatableInstructions())) {
// If we can't promote the alloca, iterate on it to check for new
// refinements exposed by splitting the current alloca. Don't iterate on an
// alloca which didn't actually change and didn't get promoted.
+ //
+ // Alternatively, if we could promote the alloca but have speculatable
+ // instructions then we will speculate them after finishing our processing
+ // of the original alloca. Mark the new one for re-visiting in the next
+ // iteration so the speculated operations can be rewritten.
+ //
+ // FIXME: We should actually track whether the rewriter changed anything.
Worklist.insert(NewAI);
}
// Drop any post-promotion work items if promotion didn't happen.
- if (!Promotable)
+ if (!Promotable) {
while (PostPromotionWorklist.size() > PPWOldSize)
PostPromotionWorklist.pop_back();
+ while (SpeculatablePHIs.size() > SPOldSize)
+ SpeculatablePHIs.pop_back();
+ while (SpeculatableSelects.size() > SSOldSize)
+ SpeculatableSelects.pop_back();
+ }
return true;
}
-/// \brief Walks the partitioning of an alloca rewriting uses of each partition.
-bool SROA::splitAlloca(AllocaInst &AI, AllocaPartitioning &P) {
+namespace {
+struct IsSliceEndLessOrEqualTo {
+ uint64_t UpperBound;
+
+ IsSliceEndLessOrEqualTo(uint64_t UpperBound) : UpperBound(UpperBound) {}
+
+ bool operator()(const AllocaSlices::iterator &I) {
+ return I->endOffset() <= UpperBound;
+ }
+};
+}
+
+static void
+removeFinishedSplitUses(SmallVectorImpl<AllocaSlices::iterator> &SplitUses,
+ uint64_t &MaxSplitUseEndOffset, uint64_t Offset) {
+ if (Offset >= MaxSplitUseEndOffset) {
+ SplitUses.clear();
+ MaxSplitUseEndOffset = 0;
+ return;
+ }
+
+ size_t SplitUsesOldSize = SplitUses.size();
+ SplitUses.erase(std::remove_if(SplitUses.begin(), SplitUses.end(),
+ IsSliceEndLessOrEqualTo(Offset)),
+ SplitUses.end());
+ if (SplitUsesOldSize == SplitUses.size())
+ return;
+
+ // Recompute the max. While this is linear, so is remove_if.
+ MaxSplitUseEndOffset = 0;
+ for (SmallVectorImpl<AllocaSlices::iterator>::iterator
+ SUI = SplitUses.begin(),
+ SUE = SplitUses.end();
+ SUI != SUE; ++SUI)
+ MaxSplitUseEndOffset = std::max((*SUI)->endOffset(), MaxSplitUseEndOffset);
+}
+
+/// \brief Walks the slices of an alloca and form partitions based on them,
+/// rewriting each of their uses.
+bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &S) {
+ if (S.begin() == S.end())
+ return false;
+
+ unsigned NumPartitions = 0;
bool Changed = false;
- for (AllocaPartitioning::iterator PI = P.begin(), PE = P.end(); PI != PE;
- ++PI)
- Changed |= rewriteAllocaPartition(AI, P, PI);
+ SmallVector<AllocaSlices::iterator, 4> SplitUses;
+ uint64_t MaxSplitUseEndOffset = 0;
+
+ uint64_t BeginOffset = S.begin()->beginOffset();
+
+ for (AllocaSlices::iterator SI = S.begin(), SJ = llvm::next(SI), SE = S.end();
+ SI != SE; SI = SJ) {
+ uint64_t MaxEndOffset = SI->endOffset();
+
+ if (!SI->isSplittable()) {
+ // When we're forming an unsplittable region, it must always start at the
+ // first slice and will extend through its end.
+ assert(BeginOffset == SI->beginOffset());
+
+ // Form a partition including all of the overlapping slices with this
+ // unsplittable slice.
+ while (SJ != SE && SJ->beginOffset() < MaxEndOffset) {
+ if (!SJ->isSplittable())
+ MaxEndOffset = std::max(MaxEndOffset, SJ->endOffset());
+ ++SJ;
+ }
+ } else {
+ assert(SI->isSplittable()); // Established above.
+
+ // Collect all of the overlapping splittable slices.
+ while (SJ != SE && SJ->beginOffset() < MaxEndOffset &&
+ SJ->isSplittable()) {
+ MaxEndOffset = std::max(MaxEndOffset, SJ->endOffset());
+ ++SJ;
+ }
+
+ // Back up MaxEndOffset and SJ if we ended the span early when
+ // encountering an unsplittable slice.
+ if (SJ != SE && SJ->beginOffset() < MaxEndOffset) {
+ assert(!SJ->isSplittable());
+ MaxEndOffset = SJ->beginOffset();
+ }
+ }
+
+ // Check if we have managed to move the end offset forward yet. If so,
+ // we'll have to rewrite uses and erase old split uses.
+ if (BeginOffset < MaxEndOffset) {
+ // Rewrite a sequence of overlapping slices.
+ Changed |=
+ rewritePartition(AI, S, SI, SJ, BeginOffset, MaxEndOffset, SplitUses);
+ ++NumPartitions;
+
+ removeFinishedSplitUses(SplitUses, MaxSplitUseEndOffset, MaxEndOffset);
+ }
+
+ // Accumulate all the splittable slices from the [SI,SJ) region which
+ // overlap going forward.
+ for (AllocaSlices::iterator SK = SI; SK != SJ; ++SK)
+ if (SK->isSplittable() && SK->endOffset() > MaxEndOffset) {
+ SplitUses.push_back(SK);
+ MaxSplitUseEndOffset = std::max(SK->endOffset(), MaxSplitUseEndOffset);
+ }
+
+ // If we're already at the end and we have no split uses, we're done.
+ if (SJ == SE && SplitUses.empty())
+ break;
+
+ // If we have no split uses or no gap in offsets, we're ready to move to
+ // the next slice.
+ if (SplitUses.empty() || (SJ != SE && MaxEndOffset == SJ->beginOffset())) {
+ BeginOffset = SJ->beginOffset();
+ continue;
+ }
+
+ // Even if we have split slices, if the next slice is splittable and the
+ // split slices reach it, we can simply set up the beginning offset of the
+ // next iteration to bridge between them.
+ if (SJ != SE && SJ->isSplittable() &&
+ MaxSplitUseEndOffset > SJ->beginOffset()) {
+ BeginOffset = MaxEndOffset;
+ continue;
+ }
+
+ // Otherwise, we have a tail of split slices. Rewrite them with an empty
+ // range of slices.
+ uint64_t PostSplitEndOffset =
+ SJ == SE ? MaxSplitUseEndOffset : SJ->beginOffset();
+
+ Changed |= rewritePartition(AI, S, SJ, SJ, MaxEndOffset, PostSplitEndOffset,
+ SplitUses);
+ ++NumPartitions;
+
+ if (SJ == SE)
+ break; // Skip the rest, we don't need to do any cleanup.
+
+ removeFinishedSplitUses(SplitUses, MaxSplitUseEndOffset,
+ PostSplitEndOffset);
+
+ // Now just reset the begin offset for the next iteration.
+ BeginOffset = SJ->beginOffset();
+ }
+
+ NumAllocaPartitions += NumPartitions;
+ MaxPartitionsPerAlloca =
+ std::max<unsigned>(NumPartitions, MaxPartitionsPerAlloca);
return Changed;
}
@@ -3545,7 +3329,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaPartitioning &P) {
/// \brief Analyze an alloca for SROA.
///
/// This analyzes the alloca to ensure we can reason about it, builds
-/// a partitioning of the alloca, and then hands it off to be split and
+/// the slices of the alloca, and then hands it off to be split and
/// rewritten as needed.
bool SROA::runOnAlloca(AllocaInst &AI) {
DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
@@ -3559,32 +3343,32 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
// Skip alloca forms that this analysis can't handle.
if (AI.isArrayAllocation() || !AI.getAllocatedType()->isSized() ||
- TD->getTypeAllocSize(AI.getAllocatedType()) == 0)
+ DL->getTypeAllocSize(AI.getAllocatedType()) == 0)
return false;
bool Changed = false;
// First, split any FCA loads and stores touching this alloca to promote
// better splitting and promotion opportunities.
- AggLoadStoreRewriter AggRewriter(*TD);
+ AggLoadStoreRewriter AggRewriter(*DL);
Changed |= AggRewriter.rewrite(AI);
- // Build the partition set using a recursive instruction-visiting builder.
- AllocaPartitioning P(*TD, AI);
- DEBUG(P.print(dbgs()));
- if (P.isEscaped())
+ // Build the slices using a recursive instruction-visiting builder.
+ AllocaSlices S(*DL, AI);
+ DEBUG(S.print(dbgs()));
+ if (S.isEscaped())
return Changed;
// Delete all the dead users of this alloca before splitting and rewriting it.
- for (AllocaPartitioning::dead_user_iterator DI = P.dead_user_begin(),
- DE = P.dead_user_end();
+ for (AllocaSlices::dead_user_iterator DI = S.dead_user_begin(),
+ DE = S.dead_user_end();
DI != DE; ++DI) {
Changed = true;
(*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType()));
DeadInsts.insert(*DI);
}
- for (AllocaPartitioning::dead_op_iterator DO = P.dead_op_begin(),
- DE = P.dead_op_end();
+ for (AllocaSlices::dead_op_iterator DO = S.dead_op_begin(),
+ DE = S.dead_op_end();
DO != DE; ++DO) {
Value *OldV = **DO;
// Clobber the use with an undef value.
@@ -3596,11 +3380,21 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
}
}
- // No partitions to split. Leave the dead alloca for a later pass to clean up.
- if (P.begin() == P.end())
+ // No slices to split. Leave the dead alloca for a later pass to clean up.
+ if (S.begin() == S.end())
return Changed;
- return splitAlloca(AI, P) || Changed;
+ Changed |= splitAlloca(AI, S);
+
+ DEBUG(dbgs() << " Speculating PHIs\n");
+ while (!SpeculatablePHIs.empty())
+ speculatePHINodeLoads(*SpeculatablePHIs.pop_back_val());
+
+ DEBUG(dbgs() << " Speculating Selects\n");
+ while (!SpeculatableSelects.empty())
+ speculateSelectInstLoads(*SpeculatableSelects.pop_back_val());
+
+ return Changed;
}
/// \brief Delete the dead instructions accumulated in this run.
@@ -3635,6 +3429,15 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
}
}
+static void enqueueUsersInWorklist(Instruction &I,
+ SmallVectorImpl<Instruction *> &Worklist,
+ SmallPtrSet<Instruction *, 8> &Visited) {
+ for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;
+ ++UI)
+ if (Visited.insert(cast<Instruction>(*UI)))
+ Worklist.push_back(cast<Instruction>(*UI));
+}
+
/// \brief Promote the allocas, using the best available technique.
///
/// This attempts to promote whatever allocas have been identified as viable in
@@ -3659,25 +3462,28 @@ bool SROA::promoteAllocas(Function &F) {
DEBUG(dbgs() << "Promoting allocas with SSAUpdater...\n");
SSAUpdater SSA;
DIBuilder DIB(*F.getParent());
- SmallVector<Instruction*, 64> Insts;
+ SmallVector<Instruction *, 64> Insts;
+
+ // We need a worklist to walk the uses of each alloca.
+ SmallVector<Instruction *, 8> Worklist;
+ SmallPtrSet<Instruction *, 8> Visited;
+ SmallVector<Instruction *, 32> DeadInsts;
for (unsigned Idx = 0, Size = PromotableAllocas.size(); Idx != Size; ++Idx) {
AllocaInst *AI = PromotableAllocas[Idx];
- for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
- UI != UE;) {
- Instruction *I = cast<Instruction>(*UI++);
+ Insts.clear();
+ Worklist.clear();
+ Visited.clear();
+
+ enqueueUsersInWorklist(*AI, Worklist, Visited);
+
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+
// FIXME: Currently the SSAUpdater infrastructure doesn't reason about
// lifetime intrinsics and so we strip them (and the bitcasts+GEPs
// leading to them) here. Eventually it should use them to optimize the
// scalar values produced.
- if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) {
- assert(onlyUsedByLifetimeMarkers(I) &&
- "Found a bitcast used outside of a lifetime marker.");
- while (!I->use_empty())
- cast<Instruction>(*I->use_begin())->eraseFromParent();
- I->eraseFromParent();
- continue;
- }
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
assert(II->getIntrinsicID() == Intrinsic::lifetime_start ||
II->getIntrinsicID() == Intrinsic::lifetime_end);
@@ -3685,10 +3491,30 @@ bool SROA::promoteAllocas(Function &F) {
continue;
}
- Insts.push_back(I);
+ // Push the loads and stores we find onto the list. SROA will already
+ // have validated that all loads and stores are viable candidates for
+ // promotion.
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ assert(LI->getType() == AI->getAllocatedType());
+ Insts.push_back(LI);
+ continue;
+ }
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ assert(SI->getValueOperand()->getType() == AI->getAllocatedType());
+ Insts.push_back(SI);
+ continue;
+ }
+
+ // For everything else, we know that only no-op bitcasts and GEPs will
+ // make it this far, just recurse through them and recall them for later
+ // removal.
+ DeadInsts.push_back(I);
+ enqueueUsersInWorklist(*I, Worklist, Visited);
}
AllocaPromoter(Insts, SSA, *AI, DIB).run(Insts);
- Insts.clear();
+ while (!DeadInsts.empty())
+ DeadInsts.pop_back_val()->eraseFromParent();
+ AI->eraseFromParent();
}
PromotableAllocas.clear();
@@ -3712,8 +3538,8 @@ namespace {
bool SROA::runOnFunction(Function &F) {
DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
C = &F.getContext();
- TD = getAnalysisIfAvailable<DataLayout>();
- if (!TD) {
+ DL = getAnalysisIfAvailable<DataLayout>();
+ if (!DL) {
DEBUG(dbgs() << " Skipping SROA -- no target data!\n");
return false;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp b/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp
new file mode 100644
index 0000000..9bcd702
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp
@@ -0,0 +1,479 @@
+//===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SampleProfileLoader transformation. This pass
+// reads a profile file generated by a sampling profiler (e.g. Linux Perf -
+// http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
+// profile information in the given profile.
+//
+// This pass generates branch weight annotations on the IR:
+//
+// - prof: Represents branch weights. This annotation is added to branches
+// to indicate the weights of each edge coming out of the branch.
+// The weight of each edge is the weight of the target block for
+// that edge. The weight of a block B is computed as the maximum
+// number of samples found in B.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sample-profile"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+// Command line option to specify the file to read samples from. This is
+// mainly used for debugging.
+static cl::opt<std::string> SampleProfileFile(
+ "sample-profile-file", cl::init(""), cl::value_desc("filename"),
+ cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
+
+namespace {
+/// \brief Sample-based profile reader.
+///
+/// Each profile contains sample counts for all the functions
+/// executed. Inside each function, statements are annotated with the
+/// collected samples on all the instructions associated with that
+/// statement.
+///
+/// For this to produce meaningful data, the program needs to be
+/// compiled with some debug information (at minimum, line numbers:
+/// -gline-tables-only). Otherwise, it will be impossible to match IR
+/// instructions to the line numbers collected by the profiler.
+///
+/// From the profile file, we are interested in collecting the
+/// following information:
+///
+/// * A list of functions included in the profile (mangled names).
+///
+/// * For each function F:
+/// 1. The total number of samples collected in F.
+///
+/// 2. The samples collected at each line in F. To provide some
+/// protection against source code shuffling, line numbers should
+/// be relative to the start of the function.
+class SampleProfile {
+public:
+ SampleProfile(StringRef F) : Profiles(0), Filename(F) {}
+
+ void dump();
+ void loadText();
+ void loadNative() { llvm_unreachable("not implemented"); }
+ bool emitAnnotations(Function &F);
+ void printFunctionProfile(raw_ostream &OS, StringRef FName);
+ void dumpFunctionProfile(StringRef FName);
+
+protected:
+ typedef DenseMap<uint32_t, uint32_t> BodySampleMap;
+ typedef DenseMap<BasicBlock *, uint32_t> BlockWeightMap;
+
+ /// \brief Representation of the runtime profile for a function.
+ ///
+ /// This data structure contains the runtime profile for a given
+ /// function. It contains the total number of samples collected
+ /// in the function and a map of samples collected in every statement.
+ struct FunctionProfile {
+ /// \brief Total number of samples collected inside this function.
+ ///
+ /// Samples are cumulative, they include all the samples collected
+ /// inside this function and all its inlined callees.
+ unsigned TotalSamples;
+
+ // \brief Total number of samples collected at the head of the function.
+ unsigned TotalHeadSamples;
+
+ /// \brief Map line offsets to collected samples.
+ ///
+ /// Each entry in this map contains the number of samples
+ /// collected at the corresponding line offset. All line locations
+ /// are an offset from the start of the function.
+ BodySampleMap BodySamples;
+
+ /// \brief Map basic blocks to their computed weights.
+ ///
+ /// The weight of a basic block is defined to be the maximum
+ /// of all the instruction weights in that block.
+ BlockWeightMap BlockWeights;
+ };
+
+ uint32_t getInstWeight(Instruction &I, unsigned FirstLineno,
+ BodySampleMap &BodySamples);
+ uint32_t computeBlockWeight(BasicBlock *B, unsigned FirstLineno,
+ BodySampleMap &BodySamples);
+
+ /// \brief Map every function to its associated profile.
+ ///
+ /// The profile of every function executed at runtime is collected
+ /// in the structure FunctionProfile. This maps function objects
+ /// to their corresponding profiles.
+ StringMap<FunctionProfile> Profiles;
+
+ /// \brief Path name to the file holding the profile data.
+ ///
+ /// The format of this file is defined by each profiler
+ /// independently. If possible, the profiler should have a text
+ /// version of the profile format to be used in constructing test
+ /// cases and debugging.
+ StringRef Filename;
+};
+
+/// \brief Loader class for text-based profiles.
+///
+/// This class defines a simple interface to read text files containing
+/// profiles. It keeps track of line number information and location of
+/// the file pointer. Users of this class are responsible for actually
+/// parsing the lines returned by the readLine function.
+///
+/// TODO - This does not really belong here. It is a generic text file
+/// reader. It should be moved to the Support library and made more general.
+class ExternalProfileTextLoader {
+public:
+ ExternalProfileTextLoader(StringRef F) : Filename(F) {
+ error_code EC;
+ EC = MemoryBuffer::getFile(Filename, Buffer);
+ if (EC)
+ report_fatal_error("Could not open profile file " + Filename + ": " +
+ EC.message());
+ FP = Buffer->getBufferStart();
+ Lineno = 0;
+ }
+
+ /// \brief Read a line from the mapped file.
+ StringRef readLine() {
+ size_t Length = 0;
+ const char *start = FP;
+ while (FP != Buffer->getBufferEnd() && *FP != '\n') {
+ Length++;
+ FP++;
+ }
+ if (FP != Buffer->getBufferEnd())
+ FP++;
+ Lineno++;
+ return StringRef(start, Length);
+ }
+
+ /// \brief Return true, if we've reached EOF.
+ bool atEOF() const { return FP == Buffer->getBufferEnd(); }
+
+ /// \brief Report a parse error message and stop compilation.
+ void reportParseError(Twine Msg) const {
+ report_fatal_error(Filename + ":" + Twine(Lineno) + ": " + Msg + "\n");
+ }
+
+private:
+ /// \brief Memory buffer holding the text file.
+ OwningPtr<MemoryBuffer> Buffer;
+
+ /// \brief Current position into the memory buffer.
+ const char *FP;
+
+ /// \brief Current line number.
+ int64_t Lineno;
+
+ /// \brief Path name where to the profile file.
+ StringRef Filename;
+};
+
+/// \brief Sample profile pass.
+///
+/// This pass reads profile data from the file specified by
+/// -sample-profile-file and annotates every affected function with the
+/// profile information found in that file.
+class SampleProfileLoader : public FunctionPass {
+public:
+ // Class identification, replacement for typeinfo
+ static char ID;
+
+ SampleProfileLoader(StringRef Name = SampleProfileFile)
+ : FunctionPass(ID), Profiler(0), Filename(Name) {
+ initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool doInitialization(Module &M);
+
+ void dump() { Profiler->dump(); }
+
+ virtual const char *getPassName() const { return "Sample profile pass"; }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+
+protected:
+ /// \brief Profile reader object.
+ OwningPtr<SampleProfile> Profiler;
+
+ /// \brief Name of the profile file to load.
+ StringRef Filename;
+};
+}
+
+/// \brief Print the function profile for \p FName on stream \p OS.
+///
+/// \param OS Stream to emit the output to.
+/// \param FName Name of the function to print.
+void SampleProfile::printFunctionProfile(raw_ostream &OS, StringRef FName) {
+ FunctionProfile FProfile = Profiles[FName];
+ OS << "Function: " << FName << ", " << FProfile.TotalSamples << ", "
+ << FProfile.TotalHeadSamples << ", " << FProfile.BodySamples.size()
+ << " sampled lines\n";
+ for (BodySampleMap::const_iterator SI = FProfile.BodySamples.begin(),
+ SE = FProfile.BodySamples.end();
+ SI != SE; ++SI)
+ OS << "\tline offset: " << SI->first
+ << ", number of samples: " << SI->second << "\n";
+ OS << "\n";
+}
+
+/// \brief Dump the function profile for \p FName.
+///
+/// \param FName Name of the function to print.
+void SampleProfile::dumpFunctionProfile(StringRef FName) {
+ printFunctionProfile(dbgs(), FName);
+}
+
+/// \brief Dump all the function profiles found.
+void SampleProfile::dump() {
+ for (StringMap<FunctionProfile>::const_iterator I = Profiles.begin(),
+ E = Profiles.end();
+ I != E; ++I)
+ dumpFunctionProfile(I->getKey());
+}
+
+/// \brief Load samples from a text file.
+///
+/// The file is divided in two segments:
+///
+/// Symbol table (represented with the string "symbol table")
+/// Number of symbols in the table
+/// symbol 1
+/// symbol 2
+/// ...
+/// symbol N
+///
+/// Function body profiles
+/// function1:total_samples:total_head_samples:number_of_locations
+/// location_offset_1: number_of_samples
+/// location_offset_2: number_of_samples
+/// ...
+/// location_offset_N: number_of_samples
+///
+/// Function names must be mangled in order for the profile loader to
+/// match them in the current translation unit.
+///
+/// Since this is a flat profile, a function that shows up more than
+/// once gets all its samples aggregated across all its instances.
+/// TODO - flat profiles are too imprecise to provide good optimization
+/// opportunities. Convert them to context-sensitive profile.
+///
+/// This textual representation is useful to generate unit tests and
+/// for debugging purposes, but it should not be used to generate
+/// profiles for large programs, as the representation is extremely
+/// inefficient.
+void SampleProfile::loadText() {
+ ExternalProfileTextLoader Loader(Filename);
+
+ // Read the symbol table.
+ StringRef Line = Loader.readLine();
+ if (Line != "symbol table")
+ Loader.reportParseError("Expected 'symbol table', found " + Line);
+ int NumSymbols;
+ Line = Loader.readLine();
+ if (Line.getAsInteger(10, NumSymbols))
+ Loader.reportParseError("Expected a number, found " + Line);
+ for (int I = 0; I < NumSymbols; I++) {
+ StringRef FName = Loader.readLine();
+ FunctionProfile &FProfile = Profiles[FName];
+ FProfile.BodySamples.clear();
+ FProfile.TotalSamples = 0;
+ FProfile.TotalHeadSamples = 0;
+ }
+
+ // Read the profile of each function. Since each function may be
+ // mentioned more than once, and we are collecting flat profiles,
+ // accumulate samples as we parse them.
+ Regex HeadRE("^([^:]+):([0-9]+):([0-9]+):([0-9]+)$");
+ Regex LineSample("^([0-9]+): ([0-9]+)$");
+ while (!Loader.atEOF()) {
+ SmallVector<StringRef, 4> Matches;
+ Line = Loader.readLine();
+ if (!HeadRE.match(Line, &Matches))
+ Loader.reportParseError("Expected 'mangled_name:NUM:NUM:NUM', found " +
+ Line);
+ assert(Matches.size() == 5);
+ StringRef FName = Matches[1];
+ unsigned NumSamples, NumHeadSamples, NumSampledLines;
+ Matches[2].getAsInteger(10, NumSamples);
+ Matches[3].getAsInteger(10, NumHeadSamples);
+ Matches[4].getAsInteger(10, NumSampledLines);
+ FunctionProfile &FProfile = Profiles[FName];
+ FProfile.TotalSamples += NumSamples;
+ FProfile.TotalHeadSamples += NumHeadSamples;
+ BodySampleMap &SampleMap = FProfile.BodySamples;
+ unsigned I;
+ for (I = 0; I < NumSampledLines && !Loader.atEOF(); I++) {
+ Line = Loader.readLine();
+ if (!LineSample.match(Line, &Matches))
+ Loader.reportParseError("Expected 'NUM: NUM', found " + Line);
+ assert(Matches.size() == 3);
+ unsigned LineOffset, NumSamples;
+ Matches[1].getAsInteger(10, LineOffset);
+ Matches[2].getAsInteger(10, NumSamples);
+ SampleMap[LineOffset] += NumSamples;
+ }
+
+ if (I < NumSampledLines)
+ Loader.reportParseError("Unexpected end of file");
+ }
+}
+
+/// \brief Get the weight for an instruction.
+///
+/// The "weight" of an instruction \p Inst is the number of samples
+/// collected on that instruction at runtime. To retrieve it, we
+/// need to compute the line number of \p Inst relative to the start of its
+/// function. We use \p FirstLineno to compute the offset. We then
+/// look up the samples collected for \p Inst using \p BodySamples.
+///
+/// \param Inst Instruction to query.
+/// \param FirstLineno Line number of the first instruction in the function.
+/// \param BodySamples Map of relative source line locations to samples.
+///
+/// \returns The profiled weight of I.
+uint32_t SampleProfile::getInstWeight(Instruction &Inst, unsigned FirstLineno,
+ BodySampleMap &BodySamples) {
+ unsigned LOffset = Inst.getDebugLoc().getLine() - FirstLineno + 1;
+ return BodySamples.lookup(LOffset);
+}
+
+/// \brief Compute the weight of a basic block.
+///
+/// The weight of basic block \p B is the maximum weight of all the
+/// instructions in B.
+///
+/// \param B The basic block to query.
+/// \param FirstLineno The line number for the first line in the
+/// function holding B.
+/// \param BodySamples The map containing all the samples collected in that
+/// function.
+///
+/// \returns The computed weight of B.
+uint32_t SampleProfile::computeBlockWeight(BasicBlock *B, unsigned FirstLineno,
+ BodySampleMap &BodySamples) {
+ // If we've computed B's weight before, return it.
+ Function *F = B->getParent();
+ FunctionProfile &FProfile = Profiles[F->getName()];
+ std::pair<BlockWeightMap::iterator, bool> Entry =
+ FProfile.BlockWeights.insert(std::make_pair(B, 0));
+ if (!Entry.second)
+ return Entry.first->second;
+
+ // Otherwise, compute and cache B's weight.
+ uint32_t Weight = 0;
+ for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) {
+ uint32_t InstWeight = getInstWeight(*I, FirstLineno, BodySamples);
+ if (InstWeight > Weight)
+ Weight = InstWeight;
+ }
+ Entry.first->second = Weight;
+ return Weight;
+}
+
+/// \brief Generate branch weight metadata for all branches in \p F.
+///
+/// For every branch instruction B in \p F, we compute the weight of the
+/// target block for each of the edges out of B. This is the weight
+/// that we associate with that branch.
+///
+/// TODO - This weight assignment will most likely be wrong if the
+/// target branch has more than two predecessors. This needs to be done
+/// using some form of flow propagation.
+///
+/// Once all the branch weights are computed, we emit the MD_prof
+/// metadata on B using the computed values.
+///
+/// \param F The function to query.
+bool SampleProfile::emitAnnotations(Function &F) {
+ bool Changed = false;
+ FunctionProfile &FProfile = Profiles[F.getName()];
+ unsigned FirstLineno = inst_begin(F)->getDebugLoc().getLine();
+ MDBuilder MDB(F.getContext());
+
+ // Clear the block weights cache.
+ FProfile.BlockWeights.clear();
+
+ // When we find a branch instruction: For each edge E out of the branch,
+ // the weight of E is the weight of the target block.
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ BasicBlock *B = I;
+ TerminatorInst *TI = B->getTerminator();
+ if (TI->getNumSuccessors() == 1)
+ continue;
+ if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
+ continue;
+
+ SmallVector<uint32_t, 4> Weights;
+ unsigned NSuccs = TI->getNumSuccessors();
+ for (unsigned I = 0; I < NSuccs; ++I) {
+ BasicBlock *Succ = TI->getSuccessor(I);
+ uint32_t Weight =
+ computeBlockWeight(Succ, FirstLineno, FProfile.BodySamples);
+ Weights.push_back(Weight);
+ }
+
+ TI->setMetadata(llvm::LLVMContext::MD_prof,
+ MDB.createBranchWeights(Weights));
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+char SampleProfileLoader::ID = 0;
+INITIALIZE_PASS(SampleProfileLoader, "sample-profile", "Sample Profile loader",
+ false, false)
+
+bool SampleProfileLoader::runOnFunction(Function &F) {
+ return Profiler->emitAnnotations(F);
+}
+
+bool SampleProfileLoader::doInitialization(Module &M) {
+ Profiler.reset(new SampleProfile(Filename));
+ Profiler->loadText();
+ return true;
+}
+
+FunctionPass *llvm::createSampleProfileLoaderPass() {
+ return new SampleProfileLoader(SampleProfileFile);
+}
+
+FunctionPass *llvm::createSampleProfileLoaderPass(StringRef Name) {
+ return new SampleProfileLoader(Name);
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
index 8a9c7da..857597e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -28,7 +28,7 @@ using namespace llvm;
/// ScalarOpts library.
void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeADCEPass(Registry);
- initializeBlockPlacementPass(Registry);
+ initializeSampleProfileLoaderPass(Registry);
initializeCodeGenPreparePass(Registry);
initializeConstantPropagationPass(Registry);
initializeCorrelatedValuePropagationPass(Registry);
@@ -44,12 +44,14 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLoopInstSimplifyPass(Registry);
initializeLoopRotatePass(Registry);
initializeLoopStrengthReducePass(Registry);
+ initializeLoopRerollPass(Registry);
initializeLoopUnrollPass(Registry);
initializeLoopUnswitchPass(Registry);
initializeLoopIdiomRecognizePass(Registry);
initializeLowerAtomicPass(Registry);
initializeLowerExpectIntrinsicPass(Registry);
initializeMemCpyOptPass(Registry);
+ initializePartiallyInlineLibCallsPass(Registry);
initializeReassociatePass(Registry);
initializeRegToMemPass(Registry);
initializeSCCPPass(Registry);
@@ -58,7 +60,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeSROA_DTPass(Registry);
initializeSROA_SSAUpPass(Registry);
initializeCFGSimplifyPassPass(Registry);
- initializeSimplifyLibCallsPass(Registry);
+ initializeStructurizeCFGPass(Registry);
initializeSinkingPass(Registry);
initializeTailCallElimPass(Registry);
}
@@ -111,6 +113,10 @@ void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopRotatePass());
}
+void LLVMAddLoopRerollPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopRerollPass());
+}
+
void LLVMAddLoopUnrollPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopUnrollPass());
}
@@ -123,6 +129,10 @@ void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createMemCpyOptPass());
}
+void LLVMAddPartiallyInlineLibCallsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createPartiallyInlineLibCallsPass());
+}
+
void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createPromoteMemoryToRegisterPass());
}
@@ -149,7 +159,7 @@ void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM,
}
void LLVMAddSimplifyLibCallsPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createSimplifyLibCallsPass());
+ // NOTE: The simplify-libcalls pass has been removed.
}
void LLVMAddTailCallEliminationPass(LLVMPassManagerRef PM) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index bfde334..57b290e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -166,21 +166,21 @@ namespace {
void DeleteDeadInstructions();
void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
- SmallVector<AllocaInst*, 32> &NewElts);
+ SmallVectorImpl<AllocaInst *> &NewElts);
void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
- SmallVector<AllocaInst*, 32> &NewElts);
+ SmallVectorImpl<AllocaInst *> &NewElts);
void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
- SmallVector<AllocaInst*, 32> &NewElts);
+ SmallVectorImpl<AllocaInst *> &NewElts);
void RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
uint64_t Offset,
- SmallVector<AllocaInst*, 32> &NewElts);
+ SmallVectorImpl<AllocaInst *> &NewElts);
void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
AllocaInst *AI,
- SmallVector<AllocaInst*, 32> &NewElts);
+ SmallVectorImpl<AllocaInst *> &NewElts);
void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
- SmallVector<AllocaInst*, 32> &NewElts);
+ SmallVectorImpl<AllocaInst *> &NewElts);
void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
- SmallVector<AllocaInst*, 32> &NewElts);
+ SmallVectorImpl<AllocaInst *> &NewElts);
bool ShouldAttemptScalarRepl(AllocaInst *AI);
};
@@ -963,7 +963,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth));
else if (SV->getType()->isPointerTy())
- SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()));
+ SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getType()));
// Zero extend or truncate the value if needed.
if (SV->getType() != AllocaType) {
@@ -1066,12 +1066,12 @@ public:
LoadAndStorePromoter::run(Insts);
AI->eraseFromParent();
- for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(),
+ for (SmallVectorImpl<DbgDeclareInst *>::iterator I = DDIs.begin(),
E = DDIs.end(); I != E; ++I) {
DbgDeclareInst *DDI = *I;
DDI->eraseFromParent();
}
- for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(),
+ for (SmallVectorImpl<DbgValueInst *>::iterator I = DVIs.begin(),
E = DVIs.end(); I != E; ++I) {
DbgValueInst *DVI = *I;
DVI->eraseFromParent();
@@ -1086,7 +1086,7 @@ public:
}
virtual void updateDebugInfo(Instruction *Inst) const {
- for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(),
+ for (SmallVectorImpl<DbgDeclareInst *>::const_iterator I = DDIs.begin(),
E = DDIs.end(); I != E; ++I) {
DbgDeclareInst *DDI = *I;
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
@@ -1094,7 +1094,7 @@ public:
else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
ConvertDebugDeclareToDebugValue(DDI, LI, *DIB);
}
- for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(),
+ for (SmallVectorImpl<DbgValueInst *>::const_iterator I = DVIs.begin(),
E = DVIs.end(); I != E; ++I) {
DbgValueInst *DVI = *I;
Value *Arg = NULL;
@@ -1865,7 +1865,7 @@ bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) {
/// Offset indicates the position within AI that is referenced by this
/// instruction.
void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
- SmallVector<AllocaInst*, 32> &NewElts) {
+ SmallVectorImpl<AllocaInst *> &NewElts) {
for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) {
Use &TheUse = UI.getUse();
Instruction *User = cast<Instruction>(*UI++);
@@ -1979,7 +1979,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
/// RewriteBitCast - Update a bitcast reference to the alloca being replaced
/// and recursively continue updating all of its uses.
void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
- SmallVector<AllocaInst*, 32> &NewElts) {
+ SmallVectorImpl<AllocaInst *> &NewElts) {
RewriteForScalarRepl(BC, AI, Offset, NewElts);
if (BC->getOperand(0) != AI)
return;
@@ -2037,7 +2037,7 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
/// elements of the alloca that are being split apart, and if so, rewrite
/// the GEP to be relative to the new element.
void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
- SmallVector<AllocaInst*, 32> &NewElts) {
+ SmallVectorImpl<AllocaInst *> &NewElts) {
uint64_t OldOffset = Offset;
SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
// If the GEP was dynamic then it must have been a dynamic vector lookup.
@@ -2099,7 +2099,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
/// to mark the lifetime of the scalarized memory.
void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
uint64_t Offset,
- SmallVector<AllocaInst*, 32> &NewElts) {
+ SmallVectorImpl<AllocaInst *> &NewElts) {
ConstantInt *OldSize = cast<ConstantInt>(II->getArgOperand(0));
// Put matching lifetime markers on everything from Offset up to
// Offset+OldSize.
@@ -2153,9 +2153,10 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
/// Rewrite it to copy or set the elements of the scalarized memory.
-void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
- AllocaInst *AI,
- SmallVector<AllocaInst*, 32> &NewElts) {
+void
+SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
+ AllocaInst *AI,
+ SmallVectorImpl<AllocaInst *> &NewElts) {
// If this is a memcpy/memmove, construct the other pointer as the
// appropriate type. The "Other" pointer is the pointer that goes to memory
// that doesn't have anything to do with the alloca that we are promoting. For
@@ -2189,7 +2190,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
if (OtherPtr == AI || OtherPtr == NewElts[0]) {
// This code will run twice for a no-op memcpy -- once for each operand.
// Put only one reference to MI on the DeadInsts list.
- for (SmallVector<Value*, 32>::const_iterator I = DeadInsts.begin(),
+ for (SmallVectorImpl<Value *>::const_iterator I = DeadInsts.begin(),
E = DeadInsts.end(); I != E; ++I)
if (*I == MI) return;
DeadInsts.push_back(MI);
@@ -2326,8 +2327,9 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that
/// overwrites the entire allocation. Extract out the pieces of the stored
/// integer and store them individually.
-void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
- SmallVector<AllocaInst*, 32> &NewElts){
+void
+SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
+ SmallVectorImpl<AllocaInst *> &NewElts) {
// Extract each element out of the integer according to its structure offset
// and store the element value to the individual alloca.
Value *SrcVal = SI->getOperand(0);
@@ -2440,8 +2442,9 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to
/// an integer. Load the individual pieces to form the aggregate value.
-void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
- SmallVector<AllocaInst*, 32> &NewElts) {
+void
+SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
+ SmallVectorImpl<AllocaInst *> &NewElts) {
// Extract each element out of the NewElts according to its structure offset
// and form the result value.
Type *AllocaEltTy = AI->getAllocatedType();
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index c243d34..8371f6d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -41,187 +41,31 @@ using namespace llvm;
STATISTIC(NumSimpl, "Number of blocks simplified");
namespace {
- struct CFGSimplifyPass : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- CFGSimplifyPass() : FunctionPass(ID) {
- initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
- }
-
- virtual bool runOnFunction(Function &F);
+struct CFGSimplifyPass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ CFGSimplifyPass() : FunctionPass(ID) {
+ initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
+ }
+ virtual bool runOnFunction(Function &F);
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<TargetTransformInfo>();
- }
- };
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetTransformInfo>();
+ }
+};
}
char CFGSimplifyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG",
- false, false)
+INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
+ false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
-INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG",
- false, false)
+INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
+ false)
// Public interface to the CFGSimplification pass
FunctionPass *llvm::createCFGSimplificationPass() {
return new CFGSimplifyPass();
}
-/// changeToUnreachable - Insert an unreachable instruction before the specified
-/// instruction, making it and the rest of the code in the block dead.
-static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
- BasicBlock *BB = I->getParent();
- // Loop over all of the successors, removing BB's entry from any PHI
- // nodes.
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
- (*SI)->removePredecessor(BB);
-
- // Insert a call to llvm.trap right before this. This turns the undefined
- // behavior into a hard fail instead of falling through into random code.
- if (UseLLVMTrap) {
- Function *TrapFn =
- Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
- CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
- CallTrap->setDebugLoc(I->getDebugLoc());
- }
- new UnreachableInst(I->getContext(), I);
-
- // All instructions after this are dead.
- BasicBlock::iterator BBI = I, BBE = BB->end();
- while (BBI != BBE) {
- if (!BBI->use_empty())
- BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
- BB->getInstList().erase(BBI++);
- }
-}
-
-/// changeToCall - Convert the specified invoke into a normal call.
-static void changeToCall(InvokeInst *II) {
- SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
- CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
- NewCall->takeName(II);
- NewCall->setCallingConv(II->getCallingConv());
- NewCall->setAttributes(II->getAttributes());
- NewCall->setDebugLoc(II->getDebugLoc());
- II->replaceAllUsesWith(NewCall);
-
- // Follow the call by a branch to the normal destination.
- BranchInst::Create(II->getNormalDest(), II);
-
- // Update PHI nodes in the unwind destination
- II->getUnwindDest()->removePredecessor(II->getParent());
- II->eraseFromParent();
-}
-
-static bool markAliveBlocks(BasicBlock *BB,
- SmallPtrSet<BasicBlock*, 128> &Reachable) {
-
- SmallVector<BasicBlock*, 128> Worklist;
- Worklist.push_back(BB);
- Reachable.insert(BB);
- bool Changed = false;
- do {
- BB = Worklist.pop_back_val();
-
- // Do a quick scan of the basic block, turning any obviously unreachable
- // instructions into LLVM unreachable insts. The instruction combining pass
- // canonicalizes unreachable insts into stores to null or undef.
- for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
- if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
- if (CI->doesNotReturn()) {
- // If we found a call to a no-return function, insert an unreachable
- // instruction after it. Make sure there isn't *already* one there
- // though.
- ++BBI;
- if (!isa<UnreachableInst>(BBI)) {
- // Don't insert a call to llvm.trap right before the unreachable.
- changeToUnreachable(BBI, false);
- Changed = true;
- }
- break;
- }
- }
-
- // Store to undef and store to null are undefined and used to signal that
- // they should be changed to unreachable by passes that can't modify the
- // CFG.
- if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
- // Don't touch volatile stores.
- if (SI->isVolatile()) continue;
-
- Value *Ptr = SI->getOperand(1);
-
- if (isa<UndefValue>(Ptr) ||
- (isa<ConstantPointerNull>(Ptr) &&
- SI->getPointerAddressSpace() == 0)) {
- changeToUnreachable(SI, true);
- Changed = true;
- break;
- }
- }
- }
-
- // Turn invokes that call 'nounwind' functions into ordinary calls.
- if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
- Value *Callee = II->getCalledValue();
- if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
- changeToUnreachable(II, true);
- Changed = true;
- } else if (II->doesNotThrow()) {
- if (II->use_empty() && II->onlyReadsMemory()) {
- // jump to the normal destination branch.
- BranchInst::Create(II->getNormalDest(), II);
- II->getUnwindDest()->removePredecessor(II->getParent());
- II->eraseFromParent();
- } else
- changeToCall(II);
- Changed = true;
- }
- }
-
- Changed |= ConstantFoldTerminator(BB, true);
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
- if (Reachable.insert(*SI))
- Worklist.push_back(*SI);
- } while (!Worklist.empty());
- return Changed;
-}
-
-/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
-/// if they are in a dead cycle. Return true if a change was made, false
-/// otherwise.
-static bool removeUnreachableBlocksFromFn(Function &F) {
- SmallPtrSet<BasicBlock*, 128> Reachable;
- bool Changed = markAliveBlocks(F.begin(), Reachable);
-
- // If there are unreachable blocks in the CFG...
- if (Reachable.size() == F.size())
- return Changed;
-
- assert(Reachable.size() < F.size());
- NumSimpl += F.size()-Reachable.size();
-
- // Loop over all of the basic blocks that are not reachable, dropping all of
- // their internal references...
- for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
- if (Reachable.count(BB))
- continue;
-
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
- if (Reachable.count(*SI))
- (*SI)->removePredecessor(BB);
- BB->dropAllReferences();
- }
-
- for (Function::iterator I = ++F.begin(); I != F.end();)
- if (!Reachable.count(I))
- I = F.getBasicBlockList().erase(I);
- else
- ++I;
-
- return true;
-}
-
/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi
/// node) return blocks, merge them together to promote recursive block merging.
static bool mergeEmptyReturnBlocks(Function &F) {
@@ -326,7 +170,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
bool CFGSimplifyPass::runOnFunction(Function &F) {
const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
- bool EverChanged = removeUnreachableBlocksFromFn(F);
+ bool EverChanged = removeUnreachableBlocks(F);
EverChanged |= mergeEmptyReturnBlocks(F);
EverChanged |= iterativelySimplifyCFG(F, TTI, TD);
@@ -334,16 +178,16 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
if (!EverChanged) return false;
// iterativelySimplifyCFG can (rarely) make some loops dead. If this happens,
- // removeUnreachableBlocksFromFn is needed to nuke them, which means we should
+ // removeUnreachableBlocks is needed to nuke them, which means we should
// iterate between the two optimizations. We structure the code like this to
// avoid reruning iterativelySimplifyCFG if the second pass of
- // removeUnreachableBlocksFromFn doesn't do anything.
- if (!removeUnreachableBlocksFromFn(F))
+ // removeUnreachableBlocks doesn't do anything.
+ if (!removeUnreachableBlocks(F))
return true;
do {
EverChanged = iterativelySimplifyCFG(F, TTI, TD);
- EverChanged |= removeUnreachableBlocksFromFn(F);
+ EverChanged |= removeUnreachableBlocks(F);
} while (EverChanged);
return true;
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
deleted file mode 100644
index 3514e6c..0000000
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ /dev/null
@@ -1,247 +0,0 @@
-//===- SimplifyLibCalls.cpp - Optimize specific well-known library calls --===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a simple pass that applies a variety of small
-// optimizations for calls to specific well-known function calls (e.g. runtime
-// library functions). Any optimization that takes the very simple form
-// "replace call to library function with simpler code that provides the same
-// result" belongs in this file.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "simplify-libcalls"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Config/config.h" // FIXME: Shouldn't depend on host!
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/BuildLibCalls.h"
-using namespace llvm;
-
-
-//===----------------------------------------------------------------------===//
-// Optimizer Base Class
-//===----------------------------------------------------------------------===//
-
-/// This class is the abstract base class for the set of optimizations that
-/// corresponds to one library call.
-namespace {
-class LibCallOptimization {
-protected:
- Function *Caller;
- const DataLayout *TD;
- const TargetLibraryInfo *TLI;
- LLVMContext* Context;
-public:
- LibCallOptimization() { }
- virtual ~LibCallOptimization() {}
-
- /// CallOptimizer - This pure virtual method is implemented by base classes to
- /// do various optimizations. If this returns null then no transformation was
- /// performed. If it returns CI, then it transformed the call and CI is to be
- /// deleted. If it returns something else, replace CI with the new value and
- /// delete CI.
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
- =0;
-
- Value *OptimizeCall(CallInst *CI, const DataLayout *TD,
- const TargetLibraryInfo *TLI, IRBuilder<> &B) {
- Caller = CI->getParent()->getParent();
- this->TD = TD;
- this->TLI = TLI;
- if (CI->getCalledFunction())
- Context = &CI->getCalledFunction()->getContext();
-
- // We never change the calling convention.
- if (CI->getCallingConv() != llvm::CallingConv::C)
- return NULL;
-
- return CallOptimizer(CI->getCalledFunction(), CI, B);
- }
-};
-} // End anonymous namespace.
-
-
-//===----------------------------------------------------------------------===//
-// SimplifyLibCalls Pass Implementation
-//===----------------------------------------------------------------------===//
-
-namespace {
- /// This pass optimizes well known library functions from libc and libm.
- ///
- class SimplifyLibCalls : public FunctionPass {
- TargetLibraryInfo *TLI;
-
- StringMap<LibCallOptimization*> Optimizations;
- public:
- static char ID; // Pass identification
- SimplifyLibCalls() : FunctionPass(ID) {
- initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
- }
- void AddOpt(LibFunc::Func F, LibCallOptimization* Opt);
- void AddOpt(LibFunc::Func F1, LibFunc::Func F2, LibCallOptimization* Opt);
-
- void InitOptimizations();
- bool runOnFunction(Function &F);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<TargetLibraryInfo>();
- }
- };
-} // end anonymous namespace.
-
-char SimplifyLibCalls::ID = 0;
-
-INITIALIZE_PASS_BEGIN(SimplifyLibCalls, "simplify-libcalls",
- "Simplify well-known library calls", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
-INITIALIZE_PASS_END(SimplifyLibCalls, "simplify-libcalls",
- "Simplify well-known library calls", false, false)
-
-// Public interface to the Simplify LibCalls pass.
-FunctionPass *llvm::createSimplifyLibCallsPass() {
- return new SimplifyLibCalls();
-}
-
-void SimplifyLibCalls::AddOpt(LibFunc::Func F, LibCallOptimization* Opt) {
- if (TLI->has(F))
- Optimizations[TLI->getName(F)] = Opt;
-}
-
-void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2,
- LibCallOptimization* Opt) {
- if (TLI->has(F1) && TLI->has(F2))
- Optimizations[TLI->getName(F1)] = Opt;
-}
-
-/// Optimizations - Populate the Optimizations map with all the optimizations
-/// we know.
-void SimplifyLibCalls::InitOptimizations() {
-}
-
-
-/// runOnFunction - Top level algorithm.
-///
-bool SimplifyLibCalls::runOnFunction(Function &F) {
- TLI = &getAnalysis<TargetLibraryInfo>();
-
- if (Optimizations.empty())
- InitOptimizations();
-
- const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
-
- IRBuilder<> Builder(F.getContext());
-
- bool Changed = false;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
- // Ignore non-calls.
- CallInst *CI = dyn_cast<CallInst>(I++);
- if (!CI || CI->hasFnAttr(Attribute::NoBuiltin)) continue;
-
- // Ignore indirect calls and calls to non-external functions.
- Function *Callee = CI->getCalledFunction();
- if (Callee == 0 || !Callee->isDeclaration() ||
- !(Callee->hasExternalLinkage() || Callee->hasDLLImportLinkage()))
- continue;
-
- // Ignore unknown calls.
- LibCallOptimization *LCO = Optimizations.lookup(Callee->getName());
- if (!LCO) continue;
-
- // Set the builder to the instruction after the call.
- Builder.SetInsertPoint(BB, I);
-
- // Use debug location of CI for all new instructions.
- Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-
- // Try to optimize this call.
- Value *Result = LCO->OptimizeCall(CI, TD, TLI, Builder);
- if (Result == 0) continue;
-
- DEBUG(dbgs() << "SimplifyLibCalls simplified: " << *CI;
- dbgs() << " into: " << *Result << "\n");
-
- // Something changed!
- Changed = true;
-
- // Inspect the instruction after the call (which was potentially just
- // added) next.
- I = CI; ++I;
-
- if (CI != Result && !CI->use_empty()) {
- CI->replaceAllUsesWith(Result);
- if (!Result->hasName())
- Result->takeName(CI);
- }
- CI->eraseFromParent();
- }
- }
- return Changed;
-}
-
-// TODO:
-// Additional cases that we need to add to this file:
-//
-// cbrt:
-// * cbrt(expN(X)) -> expN(x/3)
-// * cbrt(sqrt(x)) -> pow(x,1/6)
-// * cbrt(sqrt(x)) -> pow(x,1/9)
-//
-// exp, expf, expl:
-// * exp(log(x)) -> x
-//
-// log, logf, logl:
-// * log(exp(x)) -> x
-// * log(x**y) -> y*log(x)
-// * log(exp(y)) -> y*log(e)
-// * log(exp2(y)) -> y*log(2)
-// * log(exp10(y)) -> y*log(10)
-// * log(sqrt(x)) -> 0.5*log(x)
-// * log(pow(x,y)) -> y*log(x)
-//
-// lround, lroundf, lroundl:
-// * lround(cnst) -> cnst'
-//
-// pow, powf, powl:
-// * pow(exp(x),y) -> exp(x*y)
-// * pow(sqrt(x),y) -> pow(x,y*0.5)
-// * pow(pow(x,y),z)-> pow(x,y*z)
-//
-// round, roundf, roundl:
-// * round(cnst) -> cnst'
-//
-// signbit:
-// * signbit(cnst) -> cnst'
-// * signbit(nncst) -> 0 (if pstv is a non-negative constant)
-//
-// sqrt, sqrtf, sqrtl:
-// * sqrt(expN(x)) -> expN(x*0.5)
-// * sqrt(Nroot(x)) -> pow(x,1/(2*N))
-// * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
-//
-// strchr:
-// * strchr(p, 0) -> strlen(p)
-// tan, tanf, tanl:
-// * tan(atan(x)) -> x
-//
-// trunc, truncf, truncl:
-// * trunc(cnst) -> cnst'
-//
-//
diff --git a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
new file mode 100644
index 0000000..5045ff8f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -0,0 +1,906 @@
+//===-- StructurizeCFG.cpp ------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "structurizecfg"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/RegionPass.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+namespace {
+
+// Definition of the complex types used in this pass.
+
+typedef std::pair<BasicBlock *, Value *> BBValuePair;
+
+typedef SmallVector<RegionNode*, 8> RNVector;
+typedef SmallVector<BasicBlock*, 8> BBVector;
+typedef SmallVector<BranchInst*, 8> BranchVector;
+typedef SmallVector<BBValuePair, 2> BBValueVector;
+
+typedef SmallPtrSet<BasicBlock *, 8> BBSet;
+
+typedef MapVector<PHINode *, BBValueVector> PhiMap;
+typedef MapVector<BasicBlock *, BBVector> BB2BBVecMap;
+
+typedef DenseMap<DomTreeNode *, unsigned> DTN2UnsignedMap;
+typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap;
+typedef DenseMap<BasicBlock *, Value *> BBPredicates;
+typedef DenseMap<BasicBlock *, BBPredicates> PredMap;
+typedef DenseMap<BasicBlock *, BasicBlock*> BB2BBMap;
+
+// The name for newly created blocks.
+
+static const char *const FlowBlockName = "Flow";
+
+/// @brief Find the nearest common dominator for multiple BasicBlocks
+///
+/// Helper class for StructurizeCFG
+/// TODO: Maybe move into common code
+class NearestCommonDominator {
+ DominatorTree *DT;
+
+ DTN2UnsignedMap IndexMap;
+
+ BasicBlock *Result;
+ unsigned ResultIndex;
+ bool ExplicitMentioned;
+
+public:
+ /// \brief Start a new query
+ NearestCommonDominator(DominatorTree *DomTree) {
+ DT = DomTree;
+ Result = 0;
+ }
+
+ /// \brief Add BB to the resulting dominator
+ void addBlock(BasicBlock *BB, bool Remember = true) {
+ DomTreeNode *Node = DT->getNode(BB);
+
+ if (Result == 0) {
+ unsigned Numbering = 0;
+ for (;Node;Node = Node->getIDom())
+ IndexMap[Node] = ++Numbering;
+ Result = BB;
+ ResultIndex = 1;
+ ExplicitMentioned = Remember;
+ return;
+ }
+
+ for (;Node;Node = Node->getIDom())
+ if (IndexMap.count(Node))
+ break;
+ else
+ IndexMap[Node] = 0;
+
+ assert(Node && "Dominator tree invalid!");
+
+ unsigned Numbering = IndexMap[Node];
+ if (Numbering > ResultIndex) {
+ Result = Node->getBlock();
+ ResultIndex = Numbering;
+ ExplicitMentioned = Remember && (Result == BB);
+ } else if (Numbering == ResultIndex) {
+ ExplicitMentioned |= Remember;
+ }
+ }
+
+ /// \brief Is "Result" one of the BBs added with "Remember" = True?
+ bool wasResultExplicitMentioned() {
+ return ExplicitMentioned;
+ }
+
+ /// \brief Get the query result
+ BasicBlock *getResult() {
+ return Result;
+ }
+};
+
+/// @brief Transforms the control flow graph on one single entry/exit region
+/// at a time.
+///
+/// After the transform all "If"/"Then"/"Else" style control flow looks like
+/// this:
+///
+/// \verbatim
+/// 1
+/// ||
+/// | |
+/// 2 |
+/// | /
+/// |/
+/// 3
+/// || Where:
+/// | | 1 = "If" block, calculates the condition
+/// 4 | 2 = "Then" subregion, runs if the condition is true
+/// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
+/// |/ 4 = "Else" optional subregion, runs if the condition is false
+/// 5 5 = "End" block, also rejoins the control flow
+/// \endverbatim
+///
+/// Control flow is expressed as a branch where the true exit goes into the
+/// "Then"/"Else" region, while the false exit skips the region
+/// The condition for the optional "Else" region is expressed as a PHI node.
+/// The incomming values of the PHI node are true for the "If" edge and false
+/// for the "Then" edge.
+///
+/// Additionally to that even complicated loops look like this:
+///
+/// \verbatim
+/// 1
+/// ||
+/// | |
+/// 2 ^ Where:
+/// | / 1 = "Entry" block
+/// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
+/// 3 3 = "Flow" block, with back edge to entry block
+/// |
+/// \endverbatim
+///
+/// The back edge of the "Flow" block is always on the false side of the branch
+/// while the true side continues the general flow. So the loop condition
+/// consist of a network of PHI nodes where the true incoming values expresses
+/// breaks and the false values expresses continue states.
+class StructurizeCFG : public RegionPass {
+ Type *Boolean;
+ ConstantInt *BoolTrue;
+ ConstantInt *BoolFalse;
+ UndefValue *BoolUndef;
+
+ Function *Func;
+ Region *ParentRegion;
+
+ DominatorTree *DT;
+
+ RNVector Order;
+ BBSet Visited;
+
+ BBPhiMap DeletedPhis;
+ BB2BBVecMap AddedPhis;
+
+ PredMap Predicates;
+ BranchVector Conditions;
+
+ BB2BBMap Loops;
+ PredMap LoopPreds;
+ BranchVector LoopConds;
+
+ RegionNode *PrevNode;
+
+ void orderNodes();
+
+ void analyzeLoops(RegionNode *N);
+
+ Value *invert(Value *Condition);
+
+ Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
+
+ void gatherPredicates(RegionNode *N);
+
+ void collectInfos();
+
+ void insertConditions(bool Loops);
+
+ void delPhiValues(BasicBlock *From, BasicBlock *To);
+
+ void addPhiValues(BasicBlock *From, BasicBlock *To);
+
+ void setPhiValues();
+
+ void killTerminator(BasicBlock *BB);
+
+ void changeExit(RegionNode *Node, BasicBlock *NewExit,
+ bool IncludeDominator);
+
+ BasicBlock *getNextFlow(BasicBlock *Dominator);
+
+ BasicBlock *needPrefix(bool NeedEmpty);
+
+ BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
+
+ void setPrevNode(BasicBlock *BB);
+
+ bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
+
+ bool isPredictableTrue(RegionNode *Node);
+
+ void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
+
+ void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
+
+ void createFlow();
+
+ void rebuildSSA();
+
+public:
+ static char ID;
+
+ StructurizeCFG() :
+ RegionPass(ID) {
+ initializeStructurizeCFGPass(*PassRegistry::getPassRegistry());
+ }
+
+ using Pass::doInitialization;
+ virtual bool doInitialization(Region *R, RGPassManager &RGM);
+
+ virtual bool runOnRegion(Region *R, RGPassManager &RGM);
+
+ virtual const char *getPassName() const {
+ return "Structurize control flow";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(LowerSwitchID);
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ RegionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // end anonymous namespace
+
+char StructurizeCFG::ID = 0;
+
+INITIALIZE_PASS_BEGIN(StructurizeCFG, "structurizecfg", "Structurize the CFG",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(RegionInfo)
+INITIALIZE_PASS_END(StructurizeCFG, "structurizecfg", "Structurize the CFG",
+ false, false)
+
+/// \brief Initialize the types and constants used in the pass
+bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
+ LLVMContext &Context = R->getEntry()->getContext();
+
+ Boolean = Type::getInt1Ty(Context);
+ BoolTrue = ConstantInt::getTrue(Context);
+ BoolFalse = ConstantInt::getFalse(Context);
+ BoolUndef = UndefValue::get(Boolean);
+
+ return false;
+}
+
+/// \brief Build up the general order of nodes
+void StructurizeCFG::orderNodes() {
+ scc_iterator<Region *> I = scc_begin(ParentRegion),
+ E = scc_end(ParentRegion);
+ for (Order.clear(); I != E; ++I) {
+ std::vector<RegionNode *> &Nodes = *I;
+ Order.append(Nodes.begin(), Nodes.end());
+ }
+}
+
+/// \brief Determine the end of the loops
+void StructurizeCFG::analyzeLoops(RegionNode *N) {
+ if (N->isSubRegion()) {
+ // Test for exit as back edge
+ BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
+ if (Visited.count(Exit))
+ Loops[Exit] = N->getEntry();
+
+ } else {
+ // Test for sucessors as back edge
+ BasicBlock *BB = N->getNodeAs<BasicBlock>();
+ BranchInst *Term = cast<BranchInst>(BB->getTerminator());
+
+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *Succ = Term->getSuccessor(i);
+
+ if (Visited.count(Succ))
+ Loops[Succ] = BB;
+ }
+ }
+}
+
+/// \brief Invert the given condition
+Value *StructurizeCFG::invert(Value *Condition) {
+ // First: Check if it's a constant
+ if (Condition == BoolTrue)
+ return BoolFalse;
+
+ if (Condition == BoolFalse)
+ return BoolTrue;
+
+ if (Condition == BoolUndef)
+ return BoolUndef;
+
+ // Second: If the condition is already inverted, return the original value
+ if (match(Condition, m_Not(m_Value(Condition))))
+ return Condition;
+
+ if (Instruction *Inst = dyn_cast<Instruction>(Condition)) {
+ // Third: Check all the users for an invert
+ BasicBlock *Parent = Inst->getParent();
+ for (Value::use_iterator I = Condition->use_begin(),
+ E = Condition->use_end(); I != E; ++I) {
+
+ Instruction *User = dyn_cast<Instruction>(*I);
+ if (!User || User->getParent() != Parent)
+ continue;
+
+ if (match(*I, m_Not(m_Specific(Condition))))
+ return *I;
+ }
+
+ // Last option: Create a new instruction
+ return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator());
+ }
+
+ if (Argument *Arg = dyn_cast<Argument>(Condition)) {
+ BasicBlock &EntryBlock = Arg->getParent()->getEntryBlock();
+ return BinaryOperator::CreateNot(Condition,
+ Arg->getName() + ".inv",
+ EntryBlock.getTerminator());
+ }
+
+ llvm_unreachable("Unhandled condition to invert");
+}
+
+/// \brief Build the condition for one edge
+Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
+ bool Invert) {
+ Value *Cond = Invert ? BoolFalse : BoolTrue;
+ if (Term->isConditional()) {
+ Cond = Term->getCondition();
+
+ if (Idx != (unsigned)Invert)
+ Cond = invert(Cond);
+ }
+ return Cond;
+}
+
+/// \brief Analyze the predecessors of each block and build up predicates
+void StructurizeCFG::gatherPredicates(RegionNode *N) {
+ RegionInfo *RI = ParentRegion->getRegionInfo();
+ BasicBlock *BB = N->getEntry();
+ BBPredicates &Pred = Predicates[BB];
+ BBPredicates &LPred = LoopPreds[BB];
+
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ PI != PE; ++PI) {
+
+ // Ignore it if it's a branch from outside into our region entry
+ if (!ParentRegion->contains(*PI))
+ continue;
+
+ Region *R = RI->getRegionFor(*PI);
+ if (R == ParentRegion) {
+
+ // It's a top level block in our region
+ BranchInst *Term = cast<BranchInst>((*PI)->getTerminator());
+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *Succ = Term->getSuccessor(i);
+ if (Succ != BB)
+ continue;
+
+ if (Visited.count(*PI)) {
+ // Normal forward edge
+ if (Term->isConditional()) {
+ // Try to treat it like an ELSE block
+ BasicBlock *Other = Term->getSuccessor(!i);
+ if (Visited.count(Other) && !Loops.count(Other) &&
+ !Pred.count(Other) && !Pred.count(*PI)) {
+
+ Pred[Other] = BoolFalse;
+ Pred[*PI] = BoolTrue;
+ continue;
+ }
+ }
+ Pred[*PI] = buildCondition(Term, i, false);
+
+ } else {
+ // Back edge
+ LPred[*PI] = buildCondition(Term, i, true);
+ }
+ }
+
+ } else {
+
+ // It's an exit from a sub region
+ while(R->getParent() != ParentRegion)
+ R = R->getParent();
+
+ // Edge from inside a subregion to its entry, ignore it
+ if (R == N)
+ continue;
+
+ BasicBlock *Entry = R->getEntry();
+ if (Visited.count(Entry))
+ Pred[Entry] = BoolTrue;
+ else
+ LPred[Entry] = BoolFalse;
+ }
+ }
+}
+
+/// \brief Collect various loop and predicate infos
+void StructurizeCFG::collectInfos() {
+ // Reset predicate
+ Predicates.clear();
+
+ // and loop infos
+ Loops.clear();
+ LoopPreds.clear();
+
+ // Reset the visited nodes
+ Visited.clear();
+
+ for (RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
+ OI != OE; ++OI) {
+
+ // Analyze all the conditions leading to a node
+ gatherPredicates(*OI);
+
+ // Remember that we've seen this node
+ Visited.insert((*OI)->getEntry());
+
+ // Find the last back edges
+ analyzeLoops(*OI);
+ }
+}
+
+/// \brief Insert the missing branch conditions
+void StructurizeCFG::insertConditions(bool Loops) {
+ BranchVector &Conds = Loops ? LoopConds : Conditions;
+ Value *Default = Loops ? BoolTrue : BoolFalse;
+ SSAUpdater PhiInserter;
+
+ for (BranchVector::iterator I = Conds.begin(),
+ E = Conds.end(); I != E; ++I) {
+
+ BranchInst *Term = *I;
+ assert(Term->isConditional());
+
+ BasicBlock *Parent = Term->getParent();
+ BasicBlock *SuccTrue = Term->getSuccessor(0);
+ BasicBlock *SuccFalse = Term->getSuccessor(1);
+
+ PhiInserter.Initialize(Boolean, "");
+ PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default);
+ PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
+
+ BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
+
+ NearestCommonDominator Dominator(DT);
+ Dominator.addBlock(Parent, false);
+
+ Value *ParentValue = 0;
+ for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
+ PI != PE; ++PI) {
+
+ if (PI->first == Parent) {
+ ParentValue = PI->second;
+ break;
+ }
+ PhiInserter.AddAvailableValue(PI->first, PI->second);
+ Dominator.addBlock(PI->first);
+ }
+
+ if (ParentValue) {
+ Term->setCondition(ParentValue);
+ } else {
+ if (!Dominator.wasResultExplicitMentioned())
+ PhiInserter.AddAvailableValue(Dominator.getResult(), Default);
+
+ Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
+ }
+ }
+}
+
+/// \brief Remove all PHI values coming from "From" into "To" and remember
+/// them in DeletedPhis
+void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
+ PhiMap &Map = DeletedPhis[To];
+ for (BasicBlock::iterator I = To->begin(), E = To->end();
+ I != E && isa<PHINode>(*I);) {
+
+ PHINode &Phi = cast<PHINode>(*I++);
+ while (Phi.getBasicBlockIndex(From) != -1) {
+ Value *Deleted = Phi.removeIncomingValue(From, false);
+ Map[&Phi].push_back(std::make_pair(From, Deleted));
+ }
+ }
+}
+
+/// \brief Add a dummy PHI value as soon as we knew the new predecessor
+void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
+ for (BasicBlock::iterator I = To->begin(), E = To->end();
+ I != E && isa<PHINode>(*I);) {
+
+ PHINode &Phi = cast<PHINode>(*I++);
+ Value *Undef = UndefValue::get(Phi.getType());
+ Phi.addIncoming(Undef, From);
+ }
+ AddedPhis[To].push_back(From);
+}
+
+/// \brief Add the real PHI value as soon as everything is set up
+void StructurizeCFG::setPhiValues() {
+ SSAUpdater Updater;
+ for (BB2BBVecMap::iterator AI = AddedPhis.begin(), AE = AddedPhis.end();
+ AI != AE; ++AI) {
+
+ BasicBlock *To = AI->first;
+ BBVector &From = AI->second;
+
+ if (!DeletedPhis.count(To))
+ continue;
+
+ PhiMap &Map = DeletedPhis[To];
+ for (PhiMap::iterator PI = Map.begin(), PE = Map.end();
+ PI != PE; ++PI) {
+
+ PHINode *Phi = PI->first;
+ Value *Undef = UndefValue::get(Phi->getType());
+ Updater.Initialize(Phi->getType(), "");
+ Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
+ Updater.AddAvailableValue(To, Undef);
+
+ NearestCommonDominator Dominator(DT);
+ Dominator.addBlock(To, false);
+ for (BBValueVector::iterator VI = PI->second.begin(),
+ VE = PI->second.end(); VI != VE; ++VI) {
+
+ Updater.AddAvailableValue(VI->first, VI->second);
+ Dominator.addBlock(VI->first);
+ }
+
+ if (!Dominator.wasResultExplicitMentioned())
+ Updater.AddAvailableValue(Dominator.getResult(), Undef);
+
+ for (BBVector::iterator FI = From.begin(), FE = From.end();
+ FI != FE; ++FI) {
+
+ int Idx = Phi->getBasicBlockIndex(*FI);
+ assert(Idx != -1);
+ Phi->setIncomingValue(Idx, Updater.GetValueAtEndOfBlock(*FI));
+ }
+ }
+
+ DeletedPhis.erase(To);
+ }
+ assert(DeletedPhis.empty());
+}
+
+/// \brief Remove phi values from all successors and then remove the terminator.
+void StructurizeCFG::killTerminator(BasicBlock *BB) {
+ TerminatorInst *Term = BB->getTerminator();
+ if (!Term)
+ return;
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
+ SI != SE; ++SI) {
+
+ delPhiValues(BB, *SI);
+ }
+
+ Term->eraseFromParent();
+}
+
+/// \brief Let node exit(s) point to NewExit
+void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
+ bool IncludeDominator) {
+ if (Node->isSubRegion()) {
+ Region *SubRegion = Node->getNodeAs<Region>();
+ BasicBlock *OldExit = SubRegion->getExit();
+ BasicBlock *Dominator = 0;
+
+ // Find all the edges from the sub region to the exit
+ for (pred_iterator I = pred_begin(OldExit), E = pred_end(OldExit);
+ I != E;) {
+
+ BasicBlock *BB = *I++;
+ if (!SubRegion->contains(BB))
+ continue;
+
+ // Modify the edges to point to the new exit
+ delPhiValues(BB, OldExit);
+ BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
+ addPhiValues(BB, NewExit);
+
+ // Find the new dominator (if requested)
+ if (IncludeDominator) {
+ if (!Dominator)
+ Dominator = BB;
+ else
+ Dominator = DT->findNearestCommonDominator(Dominator, BB);
+ }
+ }
+
+ // Change the dominator (if requested)
+ if (Dominator)
+ DT->changeImmediateDominator(NewExit, Dominator);
+
+ // Update the region info
+ SubRegion->replaceExit(NewExit);
+
+ } else {
+ BasicBlock *BB = Node->getNodeAs<BasicBlock>();
+ killTerminator(BB);
+ BranchInst::Create(NewExit, BB);
+ addPhiValues(BB, NewExit);
+ if (IncludeDominator)
+ DT->changeImmediateDominator(NewExit, BB);
+ }
+}
+
+/// \brief Create a new flow node and update dominator tree and region info
+BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
+ LLVMContext &Context = Func->getContext();
+ BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
+ Order.back()->getEntry();
+ BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
+ Func, Insert);
+ DT->addNewBlock(Flow, Dominator);
+ ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
+ return Flow;
+}
+
+/// \brief Create a new or reuse the previous node as flow node
+BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) {
+ BasicBlock *Entry = PrevNode->getEntry();
+
+ if (!PrevNode->isSubRegion()) {
+ killTerminator(Entry);
+ if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
+ return Entry;
+
+ }
+
+ // create a new flow node
+ BasicBlock *Flow = getNextFlow(Entry);
+
+ // and wire it up
+ changeExit(PrevNode, Flow, true);
+ PrevNode = ParentRegion->getBBNode(Flow);
+ return Flow;
+}
+
+/// \brief Returns the region exit if possible, otherwise just a new flow node
+BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
+ bool ExitUseAllowed) {
+ if (Order.empty() && ExitUseAllowed) {
+ BasicBlock *Exit = ParentRegion->getExit();
+ DT->changeImmediateDominator(Exit, Flow);
+ addPhiValues(Flow, Exit);
+ return Exit;
+ }
+ return getNextFlow(Flow);
+}
+
+/// \brief Set the previous node
+void StructurizeCFG::setPrevNode(BasicBlock *BB) {
+ PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) : 0;
+}
+
+/// \brief Does BB dominate all the predicates of Node ?
+bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
+ BBPredicates &Preds = Predicates[Node->getEntry()];
+ for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
+ PI != PE; ++PI) {
+
+ if (!DT->dominates(BB, PI->first))
+ return false;
+ }
+ return true;
+}
+
+/// \brief Can we predict that this node will always be called?
+bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
+ BBPredicates &Preds = Predicates[Node->getEntry()];
+ bool Dominated = false;
+
+ // Regionentry is always true
+ if (PrevNode == 0)
+ return true;
+
+ for (BBPredicates::iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I) {
+
+ if (I->second != BoolTrue)
+ return false;
+
+ if (!Dominated && DT->dominates(I->first, PrevNode->getEntry()))
+ Dominated = true;
+ }
+
+ // TODO: The dominator check is too strict
+ return Dominated;
+}
+
+/// Take one node from the order vector and wire it up
+void StructurizeCFG::wireFlow(bool ExitUseAllowed,
+ BasicBlock *LoopEnd) {
+ RegionNode *Node = Order.pop_back_val();
+ Visited.insert(Node->getEntry());
+
+ if (isPredictableTrue(Node)) {
+ // Just a linear flow
+ if (PrevNode) {
+ changeExit(PrevNode, Node->getEntry(), true);
+ }
+ PrevNode = Node;
+
+ } else {
+ // Insert extra prefix node (or reuse last one)
+ BasicBlock *Flow = needPrefix(false);
+
+ // Insert extra postfix node (or use exit instead)
+ BasicBlock *Entry = Node->getEntry();
+ BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
+
+ // let it point to entry and next block
+ Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow));
+ addPhiValues(Flow, Entry);
+ DT->changeImmediateDominator(Entry, Flow);
+
+ PrevNode = Node;
+ while (!Order.empty() && !Visited.count(LoopEnd) &&
+ dominatesPredicates(Entry, Order.back())) {
+ handleLoops(false, LoopEnd);
+ }
+
+ changeExit(PrevNode, Next, false);
+ setPrevNode(Next);
+ }
+}
+
+void StructurizeCFG::handleLoops(bool ExitUseAllowed,
+ BasicBlock *LoopEnd) {
+ RegionNode *Node = Order.back();
+ BasicBlock *LoopStart = Node->getEntry();
+
+ if (!Loops.count(LoopStart)) {
+ wireFlow(ExitUseAllowed, LoopEnd);
+ return;
+ }
+
+ if (!isPredictableTrue(Node))
+ LoopStart = needPrefix(true);
+
+ LoopEnd = Loops[Node->getEntry()];
+ wireFlow(false, LoopEnd);
+ while (!Visited.count(LoopEnd)) {
+ handleLoops(false, LoopEnd);
+ }
+
+ // If the start of the loop is the entry block, we can't branch to it so
+ // insert a new dummy entry block.
+ Function *LoopFunc = LoopStart->getParent();
+ if (LoopStart == &LoopFunc->getEntryBlock()) {
+ LoopStart->setName("entry.orig");
+
+ BasicBlock *NewEntry =
+ BasicBlock::Create(LoopStart->getContext(),
+ "entry",
+ LoopFunc,
+ LoopStart);
+ BranchInst::Create(LoopStart, NewEntry);
+ }
+
+ // Create an extra loop end node
+ LoopEnd = needPrefix(false);
+ BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
+ LoopConds.push_back(BranchInst::Create(Next, LoopStart,
+ BoolUndef, LoopEnd));
+ addPhiValues(LoopEnd, LoopStart);
+ setPrevNode(Next);
+}
+
+/// After this function control flow looks like it should be, but
+/// branches and PHI nodes only have undefined conditions.
+void StructurizeCFG::createFlow() {
+ BasicBlock *Exit = ParentRegion->getExit();
+ bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
+
+ DeletedPhis.clear();
+ AddedPhis.clear();
+ Conditions.clear();
+ LoopConds.clear();
+
+ PrevNode = 0;
+ Visited.clear();
+
+ while (!Order.empty()) {
+ handleLoops(EntryDominatesExit, 0);
+ }
+
+ if (PrevNode)
+ changeExit(PrevNode, Exit, EntryDominatesExit);
+ else
+ assert(EntryDominatesExit);
+}
+
+/// Handle a rare case where the disintegrated nodes instructions
+/// no longer dominate all their uses. Not sure if this is really nessasary
+void StructurizeCFG::rebuildSSA() {
+ SSAUpdater Updater;
+ for (Region::block_iterator I = ParentRegion->block_begin(),
+ E = ParentRegion->block_end();
+ I != E; ++I) {
+
+ BasicBlock *BB = *I;
+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
+ II != IE; ++II) {
+
+ bool Initialized = false;
+ for (Use *I = &II->use_begin().getUse(), *Next; I; I = Next) {
+
+ Next = I->getNext();
+
+ Instruction *User = cast<Instruction>(I->getUser());
+ if (User->getParent() == BB) {
+ continue;
+
+ } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+ if (UserPN->getIncomingBlock(*I) == BB)
+ continue;
+ }
+
+ if (DT->dominates(II, User))
+ continue;
+
+ if (!Initialized) {
+ Value *Undef = UndefValue::get(II->getType());
+ Updater.Initialize(II->getType(), "");
+ Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
+ Updater.AddAvailableValue(BB, II);
+ Initialized = true;
+ }
+ Updater.RewriteUseAfterInsertions(*I);
+ }
+ }
+ }
+}
+
+/// \brief Run the transformation for each region found
+bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
+ if (R->isTopLevelRegion())
+ return false;
+
+ Func = R->getEntry()->getParent();
+ ParentRegion = R;
+
+ DT = &getAnalysis<DominatorTree>();
+
+ orderNodes();
+ collectInfos();
+ createFlow();
+ insertConditions(false);
+ insertConditions(true);
+ setPhiValues();
+ rebuildSSA();
+
+ // Cleanup
+ Order.clear();
+ Visited.clear();
+ DeletedPhis.clear();
+ AddedPhis.clear();
+ Predicates.clear();
+ Conditions.clear();
+ Loops.clear();
+ LoopPreds.clear();
+ LoopConds.clear();
+
+ return true;
+}
+
+/// \brief Create the pass
+Pass *llvm::createStructurizeCFGPass() {
+ return new StructurizeCFG();
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 2002e68..9fb8ddc 100644
--- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -53,6 +53,7 @@
#define DEBUG_TYPE "tailcallelim"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InlineCost.h"
@@ -69,6 +70,7 @@
#include "llvm/Support/CFG.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -97,16 +99,16 @@ namespace {
bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
BasicBlock *&OldEntry,
bool &TailCallsAreMarkedTail,
- SmallVector<PHINode*, 8> &ArgumentPHIs,
+ SmallVectorImpl<PHINode *> &ArgumentPHIs,
bool CannotTailCallElimCallsMarkedTail);
bool FoldReturnAndProcessPred(BasicBlock *BB,
ReturnInst *Ret, BasicBlock *&OldEntry,
bool &TailCallsAreMarkedTail,
- SmallVector<PHINode*, 8> &ArgumentPHIs,
+ SmallVectorImpl<PHINode *> &ArgumentPHIs,
bool CannotTailCallElimCallsMarkedTail);
bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry,
bool &TailCallsAreMarkedTail,
- SmallVector<PHINode*, 8> &ArgumentPHIs,
+ SmallVectorImpl<PHINode *> &ArgumentPHIs,
bool CannotTailCallElimCallsMarkedTail);
bool CanMoveAboveCall(Instruction *I, CallInst *CI);
Value *CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI);
@@ -129,34 +131,44 @@ void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetTransformInfo>();
}
-/// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by
-/// callees of this function. We only do very simple analysis right now, this
-/// could be expanded in the future to use mod/ref information for particular
-/// call sites if desired.
-static bool AllocaMightEscapeToCalls(AllocaInst *AI) {
- // FIXME: do simple 'address taken' analysis.
- return true;
+/// CanTRE - Scan the specified basic block for alloca instructions.
+/// If it contains any that are variable-sized or not in the entry block,
+/// returns false.
+static bool CanTRE(AllocaInst *AI) {
+ // Because of PR962, we don't TRE allocas outside the entry block.
+
+ // If this alloca is in the body of the function, or if it is a variable
+ // sized allocation, we cannot tail call eliminate calls marked 'tail'
+ // with this mechanism.
+ BasicBlock *BB = AI->getParent();
+ return BB == &BB->getParent()->getEntryBlock() &&
+ isa<ConstantInt>(AI->getArraySize());
}
-/// CheckForEscapingAllocas - Scan the specified basic block for alloca
-/// instructions. If it contains any that might be accessed by calls, return
-/// true.
-static bool CheckForEscapingAllocas(BasicBlock *BB,
- bool &CannotTCETailMarkedCall) {
- bool RetVal = false;
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
- RetVal |= AllocaMightEscapeToCalls(AI);
-
- // If this alloca is in the body of the function, or if it is a variable
- // sized allocation, we cannot tail call eliminate calls marked 'tail'
- // with this mechanism.
- if (BB != &BB->getParent()->getEntryBlock() ||
- !isa<ConstantInt>(AI->getArraySize()))
- CannotTCETailMarkedCall = true;
- }
- return RetVal;
-}
+namespace {
+struct AllocaCaptureTracker : public CaptureTracker {
+ AllocaCaptureTracker() : Captured(false) {}
+
+ void tooManyUses() LLVM_OVERRIDE { Captured = true; }
+
+ bool shouldExplore(Use *U) LLVM_OVERRIDE {
+ Value *V = U->getUser();
+ if (isa<CallInst>(V) || isa<InvokeInst>(V))
+ UsesAlloca.insert(V);
+ return true;
+ }
+
+ bool captured(Use *U) LLVM_OVERRIDE {
+ if (isa<ReturnInst>(U->getUser()))
+ return false;
+ Captured = true;
+ return true;
+ }
+
+ bool Captured;
+ SmallPtrSet<const Value *, 16> UsesAlloca;
+};
+} // end anonymous namespace
bool TailCallElim::runOnFunction(Function &F) {
// If this function is a varargs function, we won't be able to PHI the args
@@ -168,41 +180,44 @@ bool TailCallElim::runOnFunction(Function &F) {
bool TailCallsAreMarkedTail = false;
SmallVector<PHINode*, 8> ArgumentPHIs;
bool MadeChange = false;
- bool FunctionContainsEscapingAllocas = false;
- // CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls
+ // CanTRETailMarkedCall - If false, we cannot perform TRE on tail calls
// marked with the 'tail' attribute, because doing so would cause the stack
- // size to increase (real TCE would deallocate variable sized allocas, TCE
+ // size to increase (real TRE would deallocate variable sized allocas, TRE
// doesn't).
- bool CannotTCETailMarkedCall = false;
-
- // Loop over the function, looking for any returning blocks, and keeping track
- // of whether this function has any non-trivially used allocas.
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (FunctionContainsEscapingAllocas && CannotTCETailMarkedCall)
- break;
-
- FunctionContainsEscapingAllocas |=
- CheckForEscapingAllocas(BB, CannotTCETailMarkedCall);
+ bool CanTRETailMarkedCall = true;
+
+ // Find calls that can be marked tail.
+ AllocaCaptureTracker ACT;
+ for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+ CanTRETailMarkedCall &= CanTRE(AI);
+ PointerMayBeCaptured(AI, &ACT);
+ // If any allocas are captured, exit.
+ if (ACT.Captured)
+ return false;
+ }
+ }
}
- /// FIXME: The code generator produces really bad code when an 'escaping
- /// alloca' is changed from being a static alloca to being a dynamic alloca.
- /// Until this is resolved, disable this transformation if that would ever
- /// happen. This bug is PR962.
- if (FunctionContainsEscapingAllocas)
- return false;
-
- // Second pass, change any tail calls to loops.
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
- bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
- ArgumentPHIs,CannotTCETailMarkedCall);
- if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
- Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
- TailCallsAreMarkedTail, ArgumentPHIs,
- CannotTCETailMarkedCall);
- MadeChange |= Change;
+ // Second pass, change any tail recursive calls to loops.
+ //
+ // FIXME: The code generator produces really bad code when an 'escaping
+ // alloca' is changed from being a static alloca to being a dynamic alloca.
+ // Until this is resolved, disable this transformation if that would ever
+ // happen. This bug is PR962.
+ if (ACT.UsesAlloca.empty()) {
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
+ ArgumentPHIs, !CanTRETailMarkedCall);
+ if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
+ Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
+ TailCallsAreMarkedTail, ArgumentPHIs,
+ !CanTRETailMarkedCall);
+ MadeChange |= Change;
+ }
}
}
@@ -223,16 +238,24 @@ bool TailCallElim::runOnFunction(Function &F) {
}
}
- // Finally, if this function contains no non-escaping allocas, or calls
- // setjmp, mark all calls in the function as eligible for tail calls
- //(there is no stack memory for them to access).
- if (!FunctionContainsEscapingAllocas && !F.callsFunctionThatReturnsTwice())
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ // At this point, we know that the function does not have any captured
+ // allocas. If additionally the function does not call setjmp, mark all calls
+ // in the function that do not access stack memory with the tail keyword. This
+ // implies ensuring that there does not exist any path from a call that takes
+ // in an alloca but does not capture it and the call which we wish to mark
+ // with "tail".
+ if (!F.callsFunctionThatReturnsTwice()) {
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
if (CallInst *CI = dyn_cast<CallInst>(I)) {
- CI->setTailCall();
- MadeChange = true;
+ if (!ACT.UsesAlloca.count(CI)) {
+ CI->setTailCall();
+ MadeChange = true;
+ }
}
+ }
+ }
+ }
return MadeChange;
}
@@ -424,7 +447,7 @@ TailCallElim::FindTRECandidate(Instruction *TI,
bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
BasicBlock *&OldEntry,
bool &TailCallsAreMarkedTail,
- SmallVector<PHINode*, 8> &ArgumentPHIs,
+ SmallVectorImpl<PHINode *> &ArgumentPHIs,
bool CannotTailCallElimCallsMarkedTail) {
// If we are introducing accumulator recursion to eliminate operations after
// the call instruction that are both associative and commutative, the initial
@@ -600,7 +623,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB,
ReturnInst *Ret, BasicBlock *&OldEntry,
bool &TailCallsAreMarkedTail,
- SmallVector<PHINode*, 8> &ArgumentPHIs,
+ SmallVectorImpl<PHINode *> &ArgumentPHIs,
bool CannotTailCallElimCallsMarkedTail) {
bool Change = false;
@@ -634,10 +657,11 @@ bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB,
return Change;
}
-bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
- bool &TailCallsAreMarkedTail,
- SmallVector<PHINode*, 8> &ArgumentPHIs,
- bool CannotTailCallElimCallsMarkedTail) {
+bool
+TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVectorImpl<PHINode *> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail) {
CallInst *CI = FindTRECandidate(Ret, CannotTailCallElimCallsMarkedTail);
if (!CI)
return false;
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index ba99d2e..12de9ee 100644
--- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -14,6 +14,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
@@ -170,7 +171,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
if (DomTreeNode *DTN = DT->getNode(BB)) {
DomTreeNode *PredDTN = DT->getNode(PredBB);
SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
- for (SmallVector<DomTreeNode*, 8>::iterator DI = Children.begin(),
+ for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(),
DE = Children.end(); DI != DE; ++DI)
DT->changeImmediateDominator(*DI, PredDTN);
@@ -235,22 +236,6 @@ void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);
}
-/// GetSuccessorNumber - Search for the specified successor of basic block BB
-/// and return its position in the terminator instruction's list of
-/// successors. It is an error to call this with a block that is not a
-/// successor.
-unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) {
- TerminatorInst *Term = BB->getTerminator();
-#ifndef NDEBUG
- unsigned e = Term->getNumSuccessors();
-#endif
- for (unsigned i = 0; ; ++i) {
- assert(i != e && "Didn't find edge?");
- if (Term->getSuccessor(i) == Succ)
- return i;
- }
-}
-
/// SplitEdge - Split the edge connecting specified block. Pass P must
/// not be NULL.
BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
@@ -263,7 +248,6 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
// If the edge isn't critical, then BB has a single successor or Succ has a
// single pred. Split the block.
- BasicBlock::iterator SplitPoint;
if (BasicBlock *SP = Succ->getSinglePredecessor()) {
// If the successor only has a single pred, split the top of the successor
// block.
@@ -416,8 +400,12 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
// If all incoming values for the new PHI would be the same, just don't
// make a new PHI. Instead, just remove the incoming values from the old
// PHI.
- for (unsigned i = 0, e = Preds.size(); i != e; ++i)
- PN->removeIncomingValue(Preds[i], false);
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ // Explicitly check the BB index here to handle duplicates in Preds.
+ int Idx = PN->getBasicBlockIndex(Preds[i]);
+ if (Idx >= 0)
+ PN->removeIncomingValue(Idx, false);
+ }
} else {
// If the values coming into the block are not the same, we need a PHI.
// Create the new PHI node, insert it into NewBB at the end of the block
@@ -598,52 +586,6 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
}
}
-/// FindFunctionBackedges - Analyze the specified function to find all of the
-/// loop backedges in the function and return them. This is a relatively cheap
-/// (compared to computing dominators and loop info) analysis.
-///
-/// The output is added to Result, as pairs of <from,to> edge info.
-void llvm::FindFunctionBackedges(const Function &F,
- SmallVectorImpl<std::pair<const BasicBlock*,const BasicBlock*> > &Result) {
- const BasicBlock *BB = &F.getEntryBlock();
- if (succ_begin(BB) == succ_end(BB))
- return;
-
- SmallPtrSet<const BasicBlock*, 8> Visited;
- SmallVector<std::pair<const BasicBlock*, succ_const_iterator>, 8> VisitStack;
- SmallPtrSet<const BasicBlock*, 8> InStack;
-
- Visited.insert(BB);
- VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
- InStack.insert(BB);
- do {
- std::pair<const BasicBlock*, succ_const_iterator> &Top = VisitStack.back();
- const BasicBlock *ParentBB = Top.first;
- succ_const_iterator &I = Top.second;
-
- bool FoundNew = false;
- while (I != succ_end(ParentBB)) {
- BB = *I++;
- if (Visited.insert(BB)) {
- FoundNew = true;
- break;
- }
- // Successor is in VisitStack, it's a back edge.
- if (InStack.count(BB))
- Result.push_back(std::make_pair(ParentBB, BB));
- }
-
- if (FoundNew) {
- // Go down one level if there is a unvisited successor.
- InStack.insert(BB);
- VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
- } else {
- // Go up one level.
- InStack.erase(VisitStack.pop_back_val().first);
- }
- } while (!VisitStack.empty());
-}
-
/// FoldReturnIntoUncondBranch - This method duplicates the specified return
/// instruction into a predecessor which ends in an unconditional branch. If
/// the return instruction returns a value defined by a PHI, propagate the
@@ -726,3 +668,104 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp,
ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
return CheckTerm;
}
+
+/// GetIfCondition - Given a basic block (BB) with two predecessors,
+/// check to see if the merge at this block is due
+/// to an "if condition". If so, return the boolean condition that determines
+/// which entry into BB will be taken. Also, return by references the block
+/// that will be entered from if the condition is true, and the block that will
+/// be entered if the condition is false.
+///
+/// This does no checking to see if the true/false blocks have large or unsavory
+/// instructions in them.
+Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
+ BasicBlock *&IfFalse) {
+ PHINode *SomePHI = dyn_cast<PHINode>(BB->begin());
+ BasicBlock *Pred1 = NULL;
+ BasicBlock *Pred2 = NULL;
+
+ if (SomePHI) {
+ if (SomePHI->getNumIncomingValues() != 2)
+ return NULL;
+ Pred1 = SomePHI->getIncomingBlock(0);
+ Pred2 = SomePHI->getIncomingBlock(1);
+ } else {
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ if (PI == PE) // No predecessor
+ return NULL;
+ Pred1 = *PI++;
+ if (PI == PE) // Only one predecessor
+ return NULL;
+ Pred2 = *PI++;
+ if (PI != PE) // More than two predecessors
+ return NULL;
+ }
+
+ // We can only handle branches. Other control flow will be lowered to
+ // branches if possible anyway.
+ BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator());
+ BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator());
+ if (Pred1Br == 0 || Pred2Br == 0)
+ return 0;
+
+ // Eliminate code duplication by ensuring that Pred1Br is conditional if
+ // either are.
+ if (Pred2Br->isConditional()) {
+ // If both branches are conditional, we don't have an "if statement". In
+ // reality, we could transform this case, but since the condition will be
+ // required anyway, we stand no chance of eliminating it, so the xform is
+ // probably not profitable.
+ if (Pred1Br->isConditional())
+ return 0;
+
+ std::swap(Pred1, Pred2);
+ std::swap(Pred1Br, Pred2Br);
+ }
+
+ if (Pred1Br->isConditional()) {
+ // The only thing we have to watch out for here is to make sure that Pred2
+ // doesn't have incoming edges from other blocks. If it does, the condition
+ // doesn't dominate BB.
+ if (Pred2->getSinglePredecessor() == 0)
+ return 0;
+
+ // If we found a conditional branch predecessor, make sure that it branches
+ // to BB and Pred2Br. If it doesn't, this isn't an "if statement".
+ if (Pred1Br->getSuccessor(0) == BB &&
+ Pred1Br->getSuccessor(1) == Pred2) {
+ IfTrue = Pred1;
+ IfFalse = Pred2;
+ } else if (Pred1Br->getSuccessor(0) == Pred2 &&
+ Pred1Br->getSuccessor(1) == BB) {
+ IfTrue = Pred2;
+ IfFalse = Pred1;
+ } else {
+ // We know that one arm of the conditional goes to BB, so the other must
+ // go somewhere unrelated, and this must not be an "if statement".
+ return 0;
+ }
+
+ return Pred1Br->getCondition();
+ }
+
+ // Ok, if we got here, both predecessors end with an unconditional branch to
+ // BB. Don't panic! If both blocks only have a single (identical)
+ // predecessor, and THAT is a conditional branch, then we're all ok!
+ BasicBlock *CommonPred = Pred1->getSinglePredecessor();
+ if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor())
+ return 0;
+
+ // Otherwise, if this is a conditional branch, then we can use it!
+ BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
+ if (BI == 0) return 0;
+
+ assert(BI->isConditional() && "Two successors but not conditional?");
+ if (BI->getSuccessor(0) == Pred1) {
+ IfTrue = Pred1;
+ IfFalse = Pred2;
+ } else {
+ IfTrue = Pred2;
+ IfFalse = Pred1;
+ }
+ return BI->getCondition();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 8513772..0e7f7f7 100644
--- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -19,9 +19,9 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
@@ -44,7 +44,6 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
AU.addPreserved<LoopInfo>();
- AU.addPreserved<ProfileInfo>();
// No loop canonicalization guarantees are broken by this pass.
AU.addPreservedID(LoopSimplifyID);
@@ -84,39 +83,6 @@ bool BreakCriticalEdges::runOnFunction(Function &F) {
// Implementation of the external critical edge manipulation functions
//===----------------------------------------------------------------------===//
-// isCriticalEdge - Return true if the specified edge is a critical edge.
-// Critical edges are edges from a block with multiple successors to a block
-// with multiple predecessors.
-//
-bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
- bool AllowIdenticalEdges) {
- assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!");
- if (TI->getNumSuccessors() == 1) return false;
-
- const BasicBlock *Dest = TI->getSuccessor(SuccNum);
- const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest);
-
- // If there is more than one predecessor, this is a critical edge...
- assert(I != E && "No preds, but we have an edge to the block?");
- const BasicBlock *FirstPred = *I;
- ++I; // Skip one edge due to the incoming arc from TI.
- if (!AllowIdenticalEdges)
- return I != E;
-
- // If AllowIdenticalEdges is true, then we allow this edge to be considered
- // non-critical iff all preds come from TI's block.
- while (I != E) {
- const BasicBlock *P = *I;
- if (P != FirstPred)
- return true;
- // Note: leave this as is until no one ever compiles with either gcc 4.0.1
- // or Xcode 2. This seems to work around the pred_iterator assert in PR 2207
- E = pred_end(P);
- ++I;
- }
- return false;
-}
-
/// createPHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form
/// may require new PHIs in the new exit block. This function inserts the
/// new PHIs, as needed. Preds is a list of preds inside the loop, SplitBB
@@ -245,10 +211,9 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
- ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
// If we have nothing to update, just return.
- if (DT == 0 && LI == 0 && PI == 0)
+ if (DT == 0 && LI == 0)
return NewBB;
// Now update analysis information. Since the only predecessor of NewBB is
@@ -401,9 +366,5 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
}
}
- // Update ProfileInfo if it is around.
- if (PI)
- PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges);
-
return NewBB;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
index be8d39e..d105f5e 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -78,7 +78,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix, ClonedCodeInfo *CodeInfo,
- ValueMapTypeRemapper *TypeMapper) {
+ ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer) {
assert(NameSuffix && "NameSuffix cannot be null!");
#ifndef NDEBUG
@@ -147,7 +148,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
RemapInstruction(II, VMap,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
- TypeMapper);
+ TypeMapper, Materializer);
}
/// CloneFunction - Return a copy of the specified function, but without
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index f7c659f..6f008644 100644
--- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -277,8 +277,8 @@ void CodeExtractor::splitReturnBlocks() {
DomTreeNode *NewNode = DT->addNewBlock(New, *I);
- for (SmallVector<DomTreeNode*, 8>::iterator I = Children.begin(),
- E = Children.end(); I != E; ++I)
+ for (SmallVectorImpl<DomTreeNode *>::iterator I = Children.begin(),
+ E = Children.end(); I != E; ++I)
DT->changeImmediateDominator(*I, NewNode);
}
}
@@ -665,8 +665,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
TheSwitch->setCondition(call);
TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks));
// Remove redundant case
- SwitchInst::CaseIt ToBeRemoved(TheSwitch, NumExitBlocks-1);
- TheSwitch->removeCase(ToBeRemoved);
+ TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1));
break;
}
}
diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
index db525cd..0723b35 100644
--- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -10,6 +10,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
diff --git a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
new file mode 100644
index 0000000..1da226b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
@@ -0,0 +1,486 @@
+//===- FlatternCFG.cpp - Code to perform CFG flattening ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Reduce conditional branches in CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "flattencfg"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+using namespace llvm;
+
+namespace {
+class FlattenCFGOpt {
+ AliasAnalysis *AA;
+ /// \brief Use parallel-and or parallel-or to generate conditions for
+ /// conditional branches.
+ bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0);
+ /// \brief If \param BB is the merge block of an if-region, attempt to merge
+ /// the if-region with an adjacent if-region upstream if two if-regions
+ /// contain identical instructions.
+ bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0);
+ /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which
+ /// are from two if-regions whose entry blocks are \p Head1 and \p
+ /// Head2. \returns true if \p Block1 and \p Block2 contain identical
+ /// instructions, and have no memory reference alias with \p Head2.
+ /// This is used as a legality check for merging if-regions.
+ bool CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
+ BasicBlock *Block1, BasicBlock *Block2);
+
+public:
+ FlattenCFGOpt(AliasAnalysis *AA) : AA(AA) {}
+ bool run(BasicBlock *BB);
+};
+}
+
+/// If \param [in] BB has more than one predecessor that is a conditional
+/// branch, attempt to use parallel and/or for the branch condition. \returns
+/// true on success.
+///
+/// Before:
+/// ......
+/// %cmp10 = fcmp une float %tmp1, %tmp2
+/// br i1 %cmp1, label %if.then, label %lor.rhs
+///
+/// lor.rhs:
+/// ......
+/// %cmp11 = fcmp une float %tmp3, %tmp4
+/// br i1 %cmp11, label %if.then, label %ifend
+///
+/// if.end: // the merge block
+/// ......
+///
+/// if.then: // has two predecessors, both of them contains conditional branch.
+/// ......
+/// br label %if.end;
+///
+/// After:
+/// ......
+/// %cmp10 = fcmp une float %tmp1, %tmp2
+/// ......
+/// %cmp11 = fcmp une float %tmp3, %tmp4
+/// %cmp12 = or i1 %cmp10, %cmp11 // parallel-or mode.
+/// br i1 %cmp12, label %if.then, label %ifend
+///
+/// if.end:
+/// ......
+///
+/// if.then:
+/// ......
+/// br label %if.end;
+///
+/// Current implementation handles two cases.
+/// Case 1: \param BB is on the else-path.
+///
+/// BB1
+/// / |
+/// BB2 |
+/// / \ |
+/// BB3 \ | where, BB1, BB2 contain conditional branches.
+/// \ | / BB3 contains unconditional branch.
+/// \ | / BB4 corresponds to \param BB which is also the merge.
+/// BB => BB4
+///
+///
+/// Corresponding source code:
+///
+/// if (a == b && c == d)
+/// statement; // BB3
+///
+/// Case 2: \param BB BB is on the then-path.
+///
+/// BB1
+/// / |
+/// | BB2
+/// \ / | where BB1, BB2 contain conditional branches.
+/// BB => BB3 | BB3 contains unconditiona branch and corresponds
+/// \ / to \param BB. BB4 is the merge.
+/// BB4
+///
+/// Corresponding source code:
+///
+/// if (a == b || c == d)
+/// statement; // BB3
+///
+/// In both cases, \param BB is the common successor of conditional branches.
+/// In Case 1, \param BB (BB4) has an unconditional branch (BB3) as
+/// its predecessor. In Case 2, \param BB (BB3) only has conditional branches
+/// as its predecessors.
+///
+bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
+ Pass *P) {
+ PHINode *PHI = dyn_cast<PHINode>(BB->begin());
+ if (PHI)
+ return false; // For simplicity, avoid cases containing PHI nodes.
+
+ BasicBlock *LastCondBlock = NULL;
+ BasicBlock *FirstCondBlock = NULL;
+ BasicBlock *UnCondBlock = NULL;
+ int Idx = -1;
+
+ // Check predecessors of \param BB.
+ SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
+ for (SmallPtrSetIterator<BasicBlock *> PI = Preds.begin(), PE = Preds.end();
+ PI != PE; ++PI) {
+ BasicBlock *Pred = *PI;
+ BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator());
+
+ // All predecessors should terminate with a branch.
+ if (!PBI)
+ return false;
+
+ BasicBlock *PP = Pred->getSinglePredecessor();
+
+ if (PBI->isUnconditional()) {
+ // Case 1: Pred (BB3) is an unconditional block, it should
+ // have a single predecessor (BB2) that is also a predecessor
+ // of \param BB (BB4) and should not have address-taken.
+ // There should exist only one such unconditional
+ // branch among the predecessors.
+ if (UnCondBlock || !PP || (Preds.count(PP) == 0) ||
+ Pred->hasAddressTaken())
+ return false;
+
+ UnCondBlock = Pred;
+ continue;
+ }
+
+ // Only conditional branches are allowed beyond this point.
+ assert(PBI->isConditional());
+
+ // Condition's unique use should be the branch instruction.
+ Value *PC = PBI->getCondition();
+ if (!PC || !PC->hasOneUse())
+ return false;
+
+ if (PP && Preds.count(PP)) {
+ // These are internal condition blocks to be merged from, e.g.,
+ // BB2 in both cases.
+ // Should not be address-taken.
+ if (Pred->hasAddressTaken())
+ return false;
+
+ // Instructions in the internal condition blocks should be safe
+ // to hoist up.
+ for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) {
+ Instruction *CI = BI++;
+ if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI))
+ return false;
+ }
+ } else {
+ // This is the condition block to be merged into, e.g. BB1 in
+ // both cases.
+ if (FirstCondBlock)
+ return false;
+ FirstCondBlock = Pred;
+ }
+
+ // Find whether BB is uniformly on the true (or false) path
+ // for all of its predecessors.
+ BasicBlock *PS1 = PBI->getSuccessor(0);
+ BasicBlock *PS2 = PBI->getSuccessor(1);
+ BasicBlock *PS = (PS1 == BB) ? PS2 : PS1;
+ int CIdx = (PS1 == BB) ? 0 : 1;
+
+ if (Idx == -1)
+ Idx = CIdx;
+ else if (CIdx != Idx)
+ return false;
+
+ // PS is the successor which is not BB. Check successors to identify
+ // the last conditional branch.
+ if (Preds.count(PS) == 0) {
+ // Case 2.
+ LastCondBlock = Pred;
+ } else {
+ // Case 1
+ BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator());
+ if (BPS && BPS->isUnconditional()) {
+ // Case 1: PS(BB3) should be an unconditional branch.
+ LastCondBlock = Pred;
+ }
+ }
+ }
+
+ if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock))
+ return false;
+
+ TerminatorInst *TBB = LastCondBlock->getTerminator();
+ BasicBlock *PS1 = TBB->getSuccessor(0);
+ BasicBlock *PS2 = TBB->getSuccessor(1);
+ BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator());
+ BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator());
+
+ // If PS1 does not jump into PS2, but PS2 jumps into PS1,
+ // attempt branch inversion.
+ if (!PBI1 || !PBI1->isUnconditional() ||
+ (PS1->getTerminator()->getSuccessor(0) != PS2)) {
+ // Check whether PS2 jumps into PS1.
+ if (!PBI2 || !PBI2->isUnconditional() ||
+ (PS2->getTerminator()->getSuccessor(0) != PS1))
+ return false;
+
+ // Do branch inversion.
+ BasicBlock *CurrBlock = LastCondBlock;
+ bool EverChanged = false;
+ while (1) {
+ BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator());
+ CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ // Cannonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq
+ if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) {
+ CI->setPredicate(ICmpInst::getInversePredicate(Predicate));
+ BI->swapSuccessors();
+ EverChanged = true;
+ }
+ if (CurrBlock == FirstCondBlock)
+ break;
+ CurrBlock = CurrBlock->getSinglePredecessor();
+ }
+ return EverChanged;
+ }
+
+ // PS1 must have a conditional branch.
+ if (!PBI1 || !PBI1->isUnconditional())
+ return false;
+
+ // PS2 should not contain PHI node.
+ PHI = dyn_cast<PHINode>(PS2->begin());
+ if (PHI)
+ return false;
+
+ // Do the transformation.
+ BasicBlock *CB;
+ BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator());
+ bool Iteration = true;
+ IRBuilder<>::InsertPointGuard Guard(Builder);
+ Value *PC = PBI->getCondition();
+
+ do {
+ CB = PBI->getSuccessor(1 - Idx);
+ // Delete the conditional branch.
+ FirstCondBlock->getInstList().pop_back();
+ FirstCondBlock->getInstList()
+ .splice(FirstCondBlock->end(), CB->getInstList());
+ PBI = cast<BranchInst>(FirstCondBlock->getTerminator());
+ Value *CC = PBI->getCondition();
+ // Merge conditions.
+ Builder.SetInsertPoint(PBI);
+ Value *NC;
+ if (Idx == 0)
+ // Case 2, use parallel or.
+ NC = Builder.CreateOr(PC, CC);
+ else
+ // Case 1, use parallel and.
+ NC = Builder.CreateAnd(PC, CC);
+
+ PBI->replaceUsesOfWith(CC, NC);
+ PC = NC;
+ if (CB == LastCondBlock)
+ Iteration = false;
+ // Remove internal conditional branches.
+ CB->dropAllReferences();
+ // make CB unreachable and let downstream to delete the block.
+ new UnreachableInst(CB->getContext(), CB);
+ } while (Iteration);
+
+ DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock);
+ return true;
+}
+
+/// Compare blocks from two if-regions, where \param Head1 is the entry of the
+/// 1st if-region. \param Head2 is the entry of the 2nd if-region. \param
+/// Block1 is a block in the 1st if-region to compare. \param Block2 is a block
+// in the 2nd if-region to compare. \returns true if \param Block1 and \param
+/// Block2 have identical instructions and do not have memory reference alias
+/// with \param Head2.
+///
+bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
+ BasicBlock *Block1,
+ BasicBlock *Block2) {
+ TerminatorInst *PTI2 = Head2->getTerminator();
+ Instruction *PBI2 = Head2->begin();
+
+ bool eq1 = (Block1 == Head1);
+ bool eq2 = (Block2 == Head2);
+ if (eq1 || eq2) {
+ // An empty then-path or else-path.
+ return (eq1 == eq2);
+ }
+
+ // Check whether instructions in Block1 and Block2 are identical
+ // and do not alias with instructions in Head2.
+ BasicBlock::iterator iter1 = Block1->begin();
+ BasicBlock::iterator end1 = Block1->getTerminator();
+ BasicBlock::iterator iter2 = Block2->begin();
+ BasicBlock::iterator end2 = Block2->getTerminator();
+
+ while (1) {
+ if (iter1 == end1) {
+ if (iter2 != end2)
+ return false;
+ break;
+ }
+
+ if (!iter1->isIdenticalTo(iter2))
+ return false;
+
+ // Illegal to remove instructions with side effects except
+ // non-volatile stores.
+ if (iter1->mayHaveSideEffects()) {
+ Instruction *CurI = &*iter1;
+ StoreInst *SI = dyn_cast<StoreInst>(CurI);
+ if (!SI || SI->isVolatile())
+ return false;
+ }
+
+ // For simplicity and speed, data dependency check can be
+ // avoided if read from memory doesn't exist.
+ if (iter1->mayReadFromMemory())
+ return false;
+
+ if (iter1->mayWriteToMemory()) {
+ for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) {
+ if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) {
+ // Check alias with Head2.
+ if (!AA || AA->alias(iter1, BI))
+ return false;
+ }
+ }
+ }
+ ++iter1;
+ ++iter2;
+ }
+
+ return true;
+}
+
+/// Check whether \param BB is the merge block of a if-region. If yes, check
+/// whether there exists an adjacent if-region upstream, the two if-regions
+/// contain identical instructions and can be legally merged. \returns true if
+/// the two if-regions are merged.
+///
+/// From:
+/// if (a)
+/// statement;
+/// if (b)
+/// statement;
+///
+/// To:
+/// if (a || b)
+/// statement;
+///
+bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder,
+ Pass *P) {
+ BasicBlock *IfTrue2, *IfFalse2;
+ Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2);
+ Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2);
+ if (!CInst2)
+ return false;
+
+ BasicBlock *SecondEntryBlock = CInst2->getParent();
+ if (SecondEntryBlock->hasAddressTaken())
+ return false;
+
+ BasicBlock *IfTrue1, *IfFalse1;
+ Value *IfCond1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1);
+ Instruction *CInst1 = dyn_cast_or_null<Instruction>(IfCond1);
+ if (!CInst1)
+ return false;
+
+ BasicBlock *FirstEntryBlock = CInst1->getParent();
+
+ // Either then-path or else-path should be empty.
+ if ((IfTrue1 != FirstEntryBlock) && (IfFalse1 != FirstEntryBlock))
+ return false;
+ if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock))
+ return false;
+
+ TerminatorInst *PTI2 = SecondEntryBlock->getTerminator();
+ Instruction *PBI2 = SecondEntryBlock->begin();
+
+ if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1,
+ IfTrue2))
+ return false;
+
+ if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfFalse1,
+ IfFalse2))
+ return false;
+
+ // Check whether \param SecondEntryBlock has side-effect and is safe to
+ // speculate.
+ for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) {
+ Instruction *CI = BI;
+ if (isa<PHINode>(CI) || CI->mayHaveSideEffects() ||
+ !isSafeToSpeculativelyExecute(CI))
+ return false;
+ }
+
+ // Merge \param SecondEntryBlock into \param FirstEntryBlock.
+ FirstEntryBlock->getInstList().pop_back();
+ FirstEntryBlock->getInstList()
+ .splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList());
+ BranchInst *PBI = dyn_cast<BranchInst>(FirstEntryBlock->getTerminator());
+ Value *CC = PBI->getCondition();
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ Builder.SetInsertPoint(PBI);
+ Value *NC = Builder.CreateOr(CInst1, CC);
+ PBI->replaceUsesOfWith(CC, NC);
+ Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+
+ // Remove IfTrue1
+ if (IfTrue1 != FirstEntryBlock) {
+ IfTrue1->dropAllReferences();
+ IfTrue1->eraseFromParent();
+ }
+
+ // Remove IfFalse1
+ if (IfFalse1 != FirstEntryBlock) {
+ IfFalse1->dropAllReferences();
+ IfFalse1->eraseFromParent();
+ }
+
+ // Remove \param SecondEntryBlock
+ SecondEntryBlock->dropAllReferences();
+ SecondEntryBlock->eraseFromParent();
+ DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock);
+ return true;
+}
+
+bool FlattenCFGOpt::run(BasicBlock *BB) {
+ bool Changed = false;
+ assert(BB && BB->getParent() && "Block not embedded in function!");
+ assert(BB->getTerminator() && "Degenerate basic block encountered!");
+
+ IRBuilder<> Builder(BB);
+
+ if (FlattenParallelAndOr(BB, Builder))
+ return true;
+
+ if (MergeIfRegion(BB, Builder))
+ return true;
+
+ return Changed;
+}
+
+/// FlattenCFG - This function is used to flatten a CFG. For
+/// example, it uses parallel-and and parallel-or mode to collapse
+// if-conditions and merge if-regions with identical statements.
+///
+bool llvm::FlattenCFG(BasicBlock *BB, AliasAnalysis *AA) {
+ return FlattenCFGOpt(AA).run(BB);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
new file mode 100644
index 0000000..5f0a563
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -0,0 +1,183 @@
+//===-- GlobalStatus.cpp - Compute status info for globals -----------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+
+using namespace llvm;
+
+/// Return the stronger of the two ordering. If the two orderings are acquire
+/// and release, then return AcquireRelease.
+///
+static AtomicOrdering strongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
+ if (X == Acquire && Y == Release)
+ return AcquireRelease;
+ if (Y == Acquire && X == Release)
+ return AcquireRelease;
+ return (AtomicOrdering)std::max(X, Y);
+}
+
+/// It is safe to destroy a constant iff it is only used by constants itself.
+/// Note that constants cannot be cyclic, so this test is pretty easy to
+/// implement recursively.
+///
+bool llvm::isSafeToDestroyConstant(const Constant *C) {
+ if (isa<GlobalValue>(C))
+ return false;
+
+ for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
+ ++UI)
+ if (const Constant *CU = dyn_cast<Constant>(*UI)) {
+ if (!isSafeToDestroyConstant(CU))
+ return false;
+ } else
+ return false;
+ return true;
+}
+
+static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
+ SmallPtrSet<const PHINode *, 16> &PhiUsers) {
+ for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+ ++UI) {
+ const User *U = *UI;
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+ GS.HasNonInstructionUser = true;
+
+ // If the result of the constantexpr isn't pointer type, then we won't
+ // know to expect it in various places. Just reject early.
+ if (!isa<PointerType>(CE->getType()))
+ return true;
+
+ if (analyzeGlobalAux(CE, GS, PhiUsers))
+ return true;
+ } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
+ if (!GS.HasMultipleAccessingFunctions) {
+ const Function *F = I->getParent()->getParent();
+ if (GS.AccessingFunction == 0)
+ GS.AccessingFunction = F;
+ else if (GS.AccessingFunction != F)
+ GS.HasMultipleAccessingFunctions = true;
+ }
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ GS.IsLoaded = true;
+ // Don't hack on volatile loads.
+ if (LI->isVolatile())
+ return true;
+ GS.Ordering = strongerOrdering(GS.Ordering, LI->getOrdering());
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Don't allow a store OF the address, only stores TO the address.
+ if (SI->getOperand(0) == V)
+ return true;
+
+ // Don't hack on volatile stores.
+ if (SI->isVolatile())
+ return true;
+
+ GS.Ordering = strongerOrdering(GS.Ordering, SI->getOrdering());
+
+ // If this is a direct store to the global (i.e., the global is a scalar
+ // value, not an aggregate), keep more specific information about
+ // stores.
+ if (GS.StoredType != GlobalStatus::Stored) {
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(SI->getOperand(1))) {
+ Value *StoredVal = SI->getOperand(0);
+
+ if (Constant *C = dyn_cast<Constant>(StoredVal)) {
+ if (C->isThreadDependent()) {
+ // The stored value changes between threads; don't track it.
+ return true;
+ }
+ }
+
+ if (StoredVal == GV->getInitializer()) {
+ if (GS.StoredType < GlobalStatus::InitializerStored)
+ GS.StoredType = GlobalStatus::InitializerStored;
+ } else if (isa<LoadInst>(StoredVal) &&
+ cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
+ if (GS.StoredType < GlobalStatus::InitializerStored)
+ GS.StoredType = GlobalStatus::InitializerStored;
+ } else if (GS.StoredType < GlobalStatus::StoredOnce) {
+ GS.StoredType = GlobalStatus::StoredOnce;
+ GS.StoredOnceValue = StoredVal;
+ } else if (GS.StoredType == GlobalStatus::StoredOnce &&
+ GS.StoredOnceValue == StoredVal) {
+ // noop.
+ } else {
+ GS.StoredType = GlobalStatus::Stored;
+ }
+ } else {
+ GS.StoredType = GlobalStatus::Stored;
+ }
+ }
+ } else if (isa<BitCastInst>(I)) {
+ if (analyzeGlobalAux(I, GS, PhiUsers))
+ return true;
+ } else if (isa<GetElementPtrInst>(I)) {
+ if (analyzeGlobalAux(I, GS, PhiUsers))
+ return true;
+ } else if (isa<SelectInst>(I)) {
+ if (analyzeGlobalAux(I, GS, PhiUsers))
+ return true;
+ } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
+ // PHI nodes we can check just like select or GEP instructions, but we
+ // have to be careful about infinite recursion.
+ if (PhiUsers.insert(PN)) // Not already visited.
+ if (analyzeGlobalAux(I, GS, PhiUsers))
+ return true;
+ } else if (isa<CmpInst>(I)) {
+ GS.IsCompared = true;
+ } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
+ if (MTI->isVolatile())
+ return true;
+ if (MTI->getArgOperand(0) == V)
+ GS.StoredType = GlobalStatus::Stored;
+ if (MTI->getArgOperand(1) == V)
+ GS.IsLoaded = true;
+ } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
+ assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!");
+ if (MSI->isVolatile())
+ return true;
+ GS.StoredType = GlobalStatus::Stored;
+ } else if (ImmutableCallSite C = I) {
+ if (!C.isCallee(UI))
+ return true;
+ GS.IsLoaded = true;
+ } else {
+ return true; // Any other non-load instruction might take address!
+ }
+ } else if (const Constant *C = dyn_cast<Constant>(U)) {
+ GS.HasNonInstructionUser = true;
+ // We might have a dead and dangling constant hanging off of here.
+ if (!isSafeToDestroyConstant(C))
+ return true;
+ } else {
+ GS.HasNonInstructionUser = true;
+ // Otherwise must be some other user.
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) {
+ SmallPtrSet<const PHINode *, 16> PhiUsers;
+ return analyzeGlobalAux(V, GS, PhiUsers);
+}
+
+GlobalStatus::GlobalStatus()
+ : IsCompared(false), IsLoaded(false), StoredType(NotStored),
+ StoredOnceValue(0), AccessingFunction(0),
+ HasMultipleAccessingFunctions(false), HasNonInstructionUser(false),
+ Ordering(NotAtomic) {}
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index dabb67b9..d021bce 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -193,7 +193,8 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
CallInst *CI = dyn_cast<CallInst>(I);
// If this call cannot unwind, don't convert it to an invoke.
- if (!CI || CI->doesNotThrow())
+ // Inline asm calls cannot throw.
+ if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue()))
continue;
// Convert this function call into an invoke instruction. First, split the
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
index 2d1b166..f15e8d5 100644
--- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -55,7 +55,6 @@ namespace {
DominatorTree *DT;
LoopInfo *LI;
ScalarEvolution *SE;
- std::vector<BasicBlock*> LoopBlocks;
PredIteratorCache PredCache;
Loop *L;
@@ -82,11 +81,6 @@ namespace {
// Check the special guarantees that LCSSA makes.
assert(L->isLCSSAForm(*DT) && "LCSSA form not preserved!");
}
-
- /// inLoop - returns true if the given block is within the current loop
- bool inLoop(BasicBlock *B) const {
- return std::binary_search(LoopBlocks.begin(), LoopBlocks.end(), B);
- }
};
}
@@ -129,11 +123,6 @@ bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
if (ExitBlocks.empty())
return false;
- // Speed up queries by creating a sorted vector of blocks.
- LoopBlocks.clear();
- LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
- array_pod_sort(LoopBlocks.begin(), LoopBlocks.end());
-
// Look at all the instructions in the loop, checking to see if they have uses
// outside the loop. If so, rewrite those uses.
bool MadeChange = false;
@@ -198,7 +187,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
if (PHINode *PN = dyn_cast<PHINode>(U))
UserBB = PN->getIncomingBlock(UI);
- if (InstBB != UserBB && !inLoop(UserBB))
+ if (InstBB != UserBB && !L->contains(UserBB))
UsesToRewrite.push_back(&UI.getUse());
}
@@ -244,7 +233,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
// If the exit block has a predecessor not within the loop, arrange for
// the incoming value use corresponding to that predecessor to be
// rewritten in terms of a different LCSSA PHI.
- if (!inLoop(*PI))
+ if (!L->contains(*PI))
UsesToRewrite.push_back(
&PN->getOperandUse(
PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1)));
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index 12e5b3e..2768041 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -16,10 +16,10 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/DIBuilder.h"
#include "llvm/DebugInfo.h"
@@ -43,6 +43,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
+
//===----------------------------------------------------------------------===//
// Local constant propagation.
//
@@ -84,7 +86,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
BI->eraseFromParent();
return true;
}
-
+
if (Dest2 == Dest1) { // Conditional branch to same location?
// This branch matches something like this:
// br bool %cond, label %Dest, label %Dest
@@ -104,7 +106,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
}
return false;
}
-
+
if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
// If we are switching on a constant, we can convert the switch into a
// single branch instruction!
@@ -188,38 +190,33 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
return true;
}
-
+
if (SI->getNumCases() == 1) {
// Otherwise, we can fold this switch into a conditional branch
// instruction if it has only one non-default destination.
SwitchInst::CaseIt FirstCase = SI->case_begin();
- IntegersSubset& Case = FirstCase.getCaseValueEx();
- if (Case.isSingleNumber()) {
- // FIXME: Currently work with ConstantInt based numbers.
- Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
- Case.getSingleNumber(0).toConstantInt(),
- "cond");
-
- // Insert the new branch.
- BranchInst *NewBr = Builder.CreateCondBr(Cond,
- FirstCase.getCaseSuccessor(),
- SI->getDefaultDest());
- MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
- if (MD && MD->getNumOperands() == 3) {
- ConstantInt *SICase = dyn_cast<ConstantInt>(MD->getOperand(2));
- ConstantInt *SIDef = dyn_cast<ConstantInt>(MD->getOperand(1));
- assert(SICase && SIDef);
- // The TrueWeight should be the weight for the single case of SI.
- NewBr->setMetadata(LLVMContext::MD_prof,
- MDBuilder(BB->getContext()).
- createBranchWeights(SICase->getValue().getZExtValue(),
- SIDef->getValue().getZExtValue()));
- }
+ Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
+ FirstCase.getCaseValue(), "cond");
- // Delete the old switch.
- SI->eraseFromParent();
- return true;
+ // Insert the new branch.
+ BranchInst *NewBr = Builder.CreateCondBr(Cond,
+ FirstCase.getCaseSuccessor(),
+ SI->getDefaultDest());
+ MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
+ if (MD && MD->getNumOperands() == 3) {
+ ConstantInt *SICase = dyn_cast<ConstantInt>(MD->getOperand(2));
+ ConstantInt *SIDef = dyn_cast<ConstantInt>(MD->getOperand(1));
+ assert(SICase && SIDef);
+ // The TrueWeight should be the weight for the single case of SI.
+ NewBr->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BB->getContext()).
+ createBranchWeights(SICase->getValue().getZExtValue(),
+ SIDef->getValue().getZExtValue()));
}
+
+ // Delete the old switch.
+ SI->eraseFromParent();
+ return true;
}
return false;
}
@@ -231,7 +228,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
BasicBlock *TheOnlyDest = BA->getBasicBlock();
// Insert the new branch.
Builder.CreateBr(TheOnlyDest);
-
+
for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
if (IBI->getDestination(i) == TheOnlyDest)
TheOnlyDest = 0;
@@ -242,7 +239,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
IBI->eraseFromParent();
if (DeleteDeadConditions)
RecursivelyDeleteTriviallyDeadInstructions(Address, TLI);
-
+
// If we didn't find our destination in the IBI successor list, then we
// have undefined behavior. Replace the unconditional branch with an
// 'unreachable' instruction.
@@ -250,11 +247,11 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
BB->getTerminator()->eraseFromParent();
new UnreachableInst(BB->getContext(), BB);
}
-
+
return true;
}
}
-
+
return false;
}
@@ -321,10 +318,10 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
Instruction *I = dyn_cast<Instruction>(V);
if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI))
return false;
-
+
SmallVector<Instruction*, 16> DeadInsts;
DeadInsts.push_back(I);
-
+
do {
I = DeadInsts.pop_back_val();
@@ -333,9 +330,9 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
Value *OpV = I->getOperand(i);
I->setOperand(i, 0);
-
+
if (!OpV->use_empty()) continue;
-
+
// If the operand is an instruction that became dead as we nulled out the
// operand, and if it is 'trivially' dead, delete it in a future loop
// iteration.
@@ -343,7 +340,7 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
if (isInstructionTriviallyDead(OpI, TLI))
DeadInsts.push_back(OpI);
}
-
+
I->eraseFromParent();
} while (!DeadInsts.empty());
@@ -415,7 +412,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD,
Instruction *Inst = BI++;
WeakVH BIHandle(BI);
- if (recursivelySimplifyInstruction(Inst, TD)) {
+ if (recursivelySimplifyInstruction(Inst, TD, TLI)) {
MadeChange = true;
if (BIHandle != BI)
BI = BB->begin();
@@ -450,12 +447,12 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
// This only adjusts blocks with PHI nodes.
if (!isa<PHINode>(BB->begin()))
return;
-
+
// Remove the entries for Pred from the PHI nodes in BB, but do not simplify
// them down. This will leave us with single entry phi nodes and other phis
// that can be removed.
BB->removePredecessor(Pred, true);
-
+
WeakVH PhiIt = &BB->front();
while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
@@ -486,10 +483,10 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
PN->replaceAllUsesWith(NewVal);
PN->eraseFromParent();
}
-
+
BasicBlock *PredBB = DestBB->getSinglePredecessor();
assert(PredBB && "Block doesn't have a single predecessor!");
-
+
// Zap anything that took the address of DestBB. Not doing this will give the
// address an invalid value.
if (DestBB->hasAddressTaken()) {
@@ -500,10 +497,10 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
BA->getType()));
BA->destroyConstant();
}
-
+
// Anything that branched to PredBB now branches to DestBB.
PredBB->replaceAllUsesWith(DestBB);
-
+
// Splice all the instructions from PredBB to DestBB.
PredBB->getTerminator()->eraseFromParent();
DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
@@ -515,25 +512,27 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
DT->changeImmediateDominator(DestBB, PredBBIDom);
DT->eraseNode(PredBB);
}
- ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
- if (PI) {
- PI->replaceAllUses(PredBB, DestBB);
- PI->removeEdge(ProfileInfo::getEdge(PredBB, DestBB));
- }
}
// Nuke BB.
PredBB->eraseFromParent();
}
+/// CanMergeValues - Return true if we can choose one of these values to use
+/// in place of the other. Note that we will always choose the non-undef
+/// value to keep.
+static bool CanMergeValues(Value *First, Value *Second) {
+ return First == Second || isa<UndefValue>(First) || isa<UndefValue>(Second);
+}
+
/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
-/// almost-empty BB ending in an unconditional branch to Succ, into succ.
+/// almost-empty BB ending in an unconditional branch to Succ, into Succ.
///
/// Assumption: Succ is the single successor for BB.
///
static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
- DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into "
+ DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into "
<< Succ->getName() << "\n");
// Shortcut, if there is only a single predecessor it must be BB and merging
// is always safe
@@ -555,9 +554,10 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
BasicBlock *IBB = PN->getIncomingBlock(PI);
if (BBPreds.count(IBB) &&
- BBPN->getIncomingValueForBlock(IBB) != PN->getIncomingValue(PI)) {
- DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
- << Succ->getName() << " is conflicting with "
+ !CanMergeValues(BBPN->getIncomingValueForBlock(IBB),
+ PN->getIncomingValue(PI))) {
+ DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
+ << Succ->getName() << " is conflicting with "
<< BBPN->getName() << " with regard to common predecessor "
<< IBB->getName() << "\n");
return false;
@@ -570,8 +570,9 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
// one for BB, in which case this phi node will not prevent the merging
// of the block.
BasicBlock *IBB = PN->getIncomingBlock(PI);
- if (BBPreds.count(IBB) && Val != PN->getIncomingValue(PI)) {
- DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
+ if (BBPreds.count(IBB) &&
+ !CanMergeValues(Val, PN->getIncomingValue(PI))) {
+ DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
<< Succ->getName() << " is conflicting with regard to common "
<< "predecessor " << IBB->getName() << "\n");
return false;
@@ -583,6 +584,139 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
return true;
}
+typedef SmallVector<BasicBlock *, 16> PredBlockVector;
+typedef DenseMap<BasicBlock *, Value *> IncomingValueMap;
+
+/// \brief Determines the value to use as the phi node input for a block.
+///
+/// Select between \p OldVal any value that we know flows from \p BB
+/// to a particular phi on the basis of which one (if either) is not
+/// undef. Update IncomingValues based on the selected value.
+///
+/// \param OldVal The value we are considering selecting.
+/// \param BB The block that the value flows in from.
+/// \param IncomingValues A map from block-to-value for other phi inputs
+/// that we have examined.
+///
+/// \returns the selected value.
+static Value *selectIncomingValueForBlock(Value *OldVal, BasicBlock *BB,
+ IncomingValueMap &IncomingValues) {
+ if (!isa<UndefValue>(OldVal)) {
+ assert((!IncomingValues.count(BB) ||
+ IncomingValues.find(BB)->second == OldVal) &&
+ "Expected OldVal to match incoming value from BB!");
+
+ IncomingValues.insert(std::make_pair(BB, OldVal));
+ return OldVal;
+ }
+
+ IncomingValueMap::const_iterator It = IncomingValues.find(BB);
+ if (It != IncomingValues.end()) return It->second;
+
+ return OldVal;
+}
+
+/// \brief Create a map from block to value for the operands of a
+/// given phi.
+///
+/// Create a map from block to value for each non-undef value flowing
+/// into \p PN.
+///
+/// \param PN The phi we are collecting the map for.
+/// \param IncomingValues [out] The map from block to value for this phi.
+static void gatherIncomingValuesToPhi(PHINode *PN,
+ IncomingValueMap &IncomingValues) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *BB = PN->getIncomingBlock(i);
+ Value *V = PN->getIncomingValue(i);
+
+ if (!isa<UndefValue>(V))
+ IncomingValues.insert(std::make_pair(BB, V));
+ }
+}
+
+/// \brief Replace the incoming undef values to a phi with the values
+/// from a block-to-value map.
+///
+/// \param PN The phi we are replacing the undefs in.
+/// \param IncomingValues A map from block to value.
+static void replaceUndefValuesInPhi(PHINode *PN,
+ const IncomingValueMap &IncomingValues) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = PN->getIncomingValue(i);
+
+ if (!isa<UndefValue>(V)) continue;
+
+ BasicBlock *BB = PN->getIncomingBlock(i);
+ IncomingValueMap::const_iterator It = IncomingValues.find(BB);
+ if (It == IncomingValues.end()) continue;
+
+ PN->setIncomingValue(i, It->second);
+ }
+}
+
+/// \brief Replace a value flowing from a block to a phi with
+/// potentially multiple instances of that value flowing from the
+/// block's predecessors to the phi.
+///
+/// \param BB The block with the value flowing into the phi.
+/// \param BBPreds The predecessors of BB.
+/// \param PN The phi that we are updating.
+static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB,
+ const PredBlockVector &BBPreds,
+ PHINode *PN) {
+ Value *OldVal = PN->removeIncomingValue(BB, false);
+ assert(OldVal && "No entry in PHI for Pred BB!");
+
+ IncomingValueMap IncomingValues;
+
+ // We are merging two blocks - BB, and the block containing PN - and
+ // as a result we need to redirect edges from the predecessors of BB
+ // to go to the block containing PN, and update PN
+ // accordingly. Since we allow merging blocks in the case where the
+ // predecessor and successor blocks both share some predecessors,
+ // and where some of those common predecessors might have undef
+ // values flowing into PN, we want to rewrite those values to be
+ // consistent with the non-undef values.
+
+ gatherIncomingValuesToPhi(PN, IncomingValues);
+
+ // If this incoming value is one of the PHI nodes in BB, the new entries
+ // in the PHI node are the entries from the old PHI.
+ if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
+ PHINode *OldValPN = cast<PHINode>(OldVal);
+ for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i) {
+ // Note that, since we are merging phi nodes and BB and Succ might
+ // have common predecessors, we could end up with a phi node with
+ // identical incoming branches. This will be cleaned up later (and
+ // will trigger asserts if we try to clean it up now, without also
+ // simplifying the corresponding conditional branch).
+ BasicBlock *PredBB = OldValPN->getIncomingBlock(i);
+ Value *PredVal = OldValPN->getIncomingValue(i);
+ Value *Selected = selectIncomingValueForBlock(PredVal, PredBB,
+ IncomingValues);
+
+ // And add a new incoming value for this predecessor for the
+ // newly retargeted branch.
+ PN->addIncoming(Selected, PredBB);
+ }
+ } else {
+ for (unsigned i = 0, e = BBPreds.size(); i != e; ++i) {
+ // Update existing incoming values in PN for this
+ // predecessor of BB.
+ BasicBlock *PredBB = BBPreds[i];
+ Value *Selected = selectIncomingValueForBlock(OldVal, PredBB,
+ IncomingValues);
+
+ // And add a new incoming value for this predecessor for the
+ // newly retargeted branch.
+ PN->addIncoming(Selected, PredBB);
+ }
+ }
+
+ replaceUndefValuesInPhi(PN, IncomingValues);
+}
+
/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an
/// unconditional branch, and contains no instructions other than PHI nodes,
/// potential side-effect free intrinsics and the branch. If possible,
@@ -595,7 +729,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
// We can't eliminate infinite loops.
BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0);
if (BB == Succ) return false;
-
+
// Check to see if merging these blocks would cause conflicts for any of the
// phi nodes in BB or Succ. If not, we can safely merge.
if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
@@ -629,39 +763,21 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
}
DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
-
+
if (isa<PHINode>(Succ->begin())) {
// If there is more than one pred of succ, and there are PHI nodes in
// the successor, then we need to add incoming edges for the PHI nodes
//
- const SmallVector<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
-
+ const PredBlockVector BBPreds(pred_begin(BB), pred_end(BB));
+
// Loop over all of the PHI nodes in the successor of BB.
for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
PHINode *PN = cast<PHINode>(I);
- Value *OldVal = PN->removeIncomingValue(BB, false);
- assert(OldVal && "No entry in PHI for Pred BB!");
-
- // If this incoming value is one of the PHI nodes in BB, the new entries
- // in the PHI node are the entries from the old PHI.
- if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
- PHINode *OldValPN = cast<PHINode>(OldVal);
- for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i)
- // Note that, since we are merging phi nodes and BB and Succ might
- // have common predecessors, we could end up with a phi node with
- // identical incoming branches. This will be cleaned up later (and
- // will trigger asserts if we try to clean it up now, without also
- // simplifying the corresponding conditional branch).
- PN->addIncoming(OldValPN->getIncomingValue(i),
- OldValPN->getIncomingBlock(i));
- } else {
- // Add an incoming value for each of the new incoming values.
- for (unsigned i = 0, e = BBPreds.size(); i != e; ++i)
- PN->addIncoming(OldVal, BBPreds[i]);
- }
+
+ redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN);
}
}
-
+
if (Succ->getSinglePredecessor()) {
// BB is the only predecessor of Succ, so Succ will end up with exactly
// the same predecessors BB had.
@@ -676,7 +792,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
PN->eraseFromParent();
}
}
-
+
// Everything that jumped to BB now goes to Succ.
BB->replaceAllUsesWith(Succ);
if (!Succ->hasName()) Succ->takeName(BB);
@@ -784,7 +900,7 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
// the final program then it is impossible for us to reliably enforce the
// preferred alignment.
if (GV->isWeakForLinker()) return Align;
-
+
if (GV->getAlignment() >= PrefAlign)
return GV->getAlignment();
// We can only increase the alignment of the global if it has no alignment
@@ -804,26 +920,27 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
/// and it is more than the alignment of the ultimate object, see if we can
/// increase the alignment of the ultimate object, making this check succeed.
unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
- const DataLayout *TD) {
+ const DataLayout *DL) {
assert(V->getType()->isPointerTy() &&
"getOrEnforceKnownAlignment expects a pointer!");
- unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
+ unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64;
+
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(V, KnownZero, KnownOne, TD);
+ ComputeMaskedBits(V, KnownZero, KnownOne, DL);
unsigned TrailZ = KnownZero.countTrailingOnes();
-
- // Avoid trouble with rediculously large TrailZ values, such as
+
+ // Avoid trouble with ridiculously large TrailZ values, such as
// those computed from a null pointer.
TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
-
+
unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
-
+
// LLVM doesn't support alignments larger than this currently.
Align = std::min(Align, +Value::MaximumAlignment);
-
+
if (PrefAlign > Align)
- Align = enforceKnownAlignment(V, Align, PrefAlign, TD);
-
+ Align = enforceKnownAlignment(V, Align, PrefAlign, DL);
+
// We don't need to make any adjustment.
return Align;
}
@@ -854,7 +971,9 @@ static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) {
bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
StoreInst *SI, DIBuilder &Builder) {
DIVariable DIVar(DDI->getVariable());
- if (!DIVar.Verify())
+ assert((!DIVar || DIVar.isVariable()) &&
+ "Variable in DbgDeclareInst should be either null or a DIVariable.");
+ if (!DIVar)
return false;
if (LdStHasDebugValue(DIVar, SI))
@@ -888,16 +1007,18 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
LoadInst *LI, DIBuilder &Builder) {
DIVariable DIVar(DDI->getVariable());
- if (!DIVar.Verify())
+ assert((!DIVar || DIVar.isVariable()) &&
+ "Variable in DbgDeclareInst should be either null or a DIVariable.");
+ if (!DIVar)
return false;
if (LdStHasDebugValue(DIVar, LI))
return true;
- Instruction *DbgVal =
+ Instruction *DbgVal =
Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0,
DIVar, LI);
-
+
// Propagate any debug metadata from the store onto the dbg.value.
DebugLoc LIDL = LI->getDebugLoc();
if (!LIDL.isUnknown())
@@ -921,10 +1042,14 @@ bool llvm::LowerDbgDeclare(Function &F) {
if (Dbgs.empty())
return false;
- for (SmallVector<DbgDeclareInst *, 4>::iterator I = Dbgs.begin(),
+ for (SmallVectorImpl<DbgDeclareInst *>::iterator I = Dbgs.begin(),
E = Dbgs.end(); I != E; ++I) {
DbgDeclareInst *DDI = *I;
- if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) {
+ AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress());
+ // If this is an alloca for a scalar variable, insert a dbg.value
+ // at each load and store to the alloca and erase the dbg.declare.
+ if (AI && !AI->isArrayAllocation()) {
+
// We only remove the dbg.declare intrinsic if all uses are
// converted to dbg.value intrinsics.
bool RemoveDDI = true;
@@ -961,7 +1086,9 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
if (!DDI)
return false;
DIVariable DIVar(DDI->getVariable());
- if (!DIVar.Verify())
+ assert((!DIVar || DIVar.isVariable()) &&
+ "Variable in DbgDeclareInst should be either null or a DIVariable.");
+ if (!DIVar)
return false;
// Create a copy of the original DIDescriptor for user variable, appending
@@ -990,33 +1117,153 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
return true;
}
-bool llvm::removeUnreachableBlocks(Function &F) {
- SmallPtrSet<BasicBlock*, 16> Reachable;
+/// changeToUnreachable - Insert an unreachable instruction before the specified
+/// instruction, making it and the rest of the code in the block dead.
+static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
+ BasicBlock *BB = I->getParent();
+ // Loop over all of the successors, removing BB's entry from any PHI
+ // nodes.
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ (*SI)->removePredecessor(BB);
+
+ // Insert a call to llvm.trap right before this. This turns the undefined
+ // behavior into a hard fail instead of falling through into random code.
+ if (UseLLVMTrap) {
+ Function *TrapFn =
+ Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
+ CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
+ CallTrap->setDebugLoc(I->getDebugLoc());
+ }
+ new UnreachableInst(I->getContext(), I);
+
+ // All instructions after this are dead.
+ BasicBlock::iterator BBI = I, BBE = BB->end();
+ while (BBI != BBE) {
+ if (!BBI->use_empty())
+ BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+ BB->getInstList().erase(BBI++);
+ }
+}
+
+/// changeToCall - Convert the specified invoke into a normal call.
+static void changeToCall(InvokeInst *II) {
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
+ NewCall->takeName(II);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
+ II->replaceAllUsesWith(NewCall);
+
+ // Follow the call by a branch to the normal destination.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Update PHI nodes in the unwind destination
+ II->getUnwindDest()->removePredecessor(II->getParent());
+ II->eraseFromParent();
+}
+
+static bool markAliveBlocks(BasicBlock *BB,
+ SmallPtrSet<BasicBlock*, 128> &Reachable) {
+
SmallVector<BasicBlock*, 128> Worklist;
- Worklist.push_back(&F.getEntryBlock());
- Reachable.insert(&F.getEntryBlock());
+ Worklist.push_back(BB);
+ Reachable.insert(BB);
+ bool Changed = false;
do {
- BasicBlock *BB = Worklist.pop_back_val();
+ BB = Worklist.pop_back_val();
+
+ // Do a quick scan of the basic block, turning any obviously unreachable
+ // instructions into LLVM unreachable insts. The instruction combining pass
+ // canonicalizes unreachable insts into stores to null or undef.
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
+ if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
+ if (CI->doesNotReturn()) {
+ // If we found a call to a no-return function, insert an unreachable
+ // instruction after it. Make sure there isn't *already* one there
+ // though.
+ ++BBI;
+ if (!isa<UnreachableInst>(BBI)) {
+ // Don't insert a call to llvm.trap right before the unreachable.
+ changeToUnreachable(BBI, false);
+ Changed = true;
+ }
+ break;
+ }
+ }
+
+ // Store to undef and store to null are undefined and used to signal that
+ // they should be changed to unreachable by passes that can't modify the
+ // CFG.
+ if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ // Don't touch volatile stores.
+ if (SI->isVolatile()) continue;
+
+ Value *Ptr = SI->getOperand(1);
+
+ if (isa<UndefValue>(Ptr) ||
+ (isa<ConstantPointerNull>(Ptr) &&
+ SI->getPointerAddressSpace() == 0)) {
+ changeToUnreachable(SI, true);
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ // Turn invokes that call 'nounwind' functions into ordinary calls.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ Value *Callee = II->getCalledValue();
+ if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
+ changeToUnreachable(II, true);
+ Changed = true;
+ } else if (II->doesNotThrow()) {
+ if (II->use_empty() && II->onlyReadsMemory()) {
+ // jump to the normal destination branch.
+ BranchInst::Create(II->getNormalDest(), II);
+ II->getUnwindDest()->removePredecessor(II->getParent());
+ II->eraseFromParent();
+ } else
+ changeToCall(II);
+ Changed = true;
+ }
+ }
+
+ Changed |= ConstantFoldTerminator(BB, true);
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
if (Reachable.insert(*SI))
Worklist.push_back(*SI);
} while (!Worklist.empty());
+ return Changed;
+}
+
+/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
+/// if they are in a dead cycle. Return true if a change was made, false
+/// otherwise.
+bool llvm::removeUnreachableBlocks(Function &F) {
+ SmallPtrSet<BasicBlock*, 128> Reachable;
+ bool Changed = markAliveBlocks(F.begin(), Reachable);
+ // If there are unreachable blocks in the CFG...
if (Reachable.size() == F.size())
- return false;
+ return Changed;
assert(Reachable.size() < F.size());
- for (Function::iterator I = llvm::next(F.begin()), E = F.end(); I != E; ++I) {
- if (Reachable.count(I))
+ NumRemoved += F.size()-Reachable.size();
+
+ // Loop over all of the basic blocks that are not reachable, dropping all of
+ // their internal references...
+ for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
+ if (Reachable.count(BB))
continue;
- for (succ_iterator SI = succ_begin(I), SE = succ_end(I); SI != SE; ++SI)
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
if (Reachable.count(*SI))
- (*SI)->removePredecessor(I);
- I->dropAllReferences();
+ (*SI)->removePredecessor(BB);
+ BB->dropAllReferences();
}
- for (Function::iterator I = llvm::next(F.begin()), E=F.end(); I != E;)
+ for (Function::iterator I = ++F.begin(); I != F.end();)
if (!Reachable.count(I))
I = F.getBasicBlockList().erase(I);
else
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index 37819cc..6d5f16c 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -59,6 +59,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
@@ -100,16 +101,16 @@ namespace {
private:
bool ProcessLoop(Loop *L, LPPassManager &LPM);
BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
- BasicBlock *InsertPreheaderForLoop(Loop *L);
Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM,
BasicBlock *Preheader);
BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
- void PlaceSplitBlockCarefully(BasicBlock *NewBB,
- SmallVectorImpl<BasicBlock*> &SplitPreds,
- Loop *L);
};
}
+static void PlaceSplitBlockCarefully(BasicBlock *NewBB,
+ SmallVectorImpl<BasicBlock*> &SplitPreds,
+ Loop *L);
+
char LoopSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
"Canonicalize natural loops", true, false)
@@ -208,7 +209,7 @@ ReprocessLoop:
// Does the loop already have a preheader? If so, don't insert one.
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
- Preheader = InsertPreheaderForLoop(L);
+ Preheader = InsertPreheaderForLoop(L, this);
if (Preheader) {
++NumInserted;
Changed = true;
@@ -367,7 +368,7 @@ ReprocessLoop:
/// preheader, this method is called to insert one. This method has two phases:
/// preheader insertion and analysis updating.
///
-BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
+BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
BasicBlock *Header = L->getHeader();
// Compute the set of predecessors of the loop that are not in the loop.
@@ -390,11 +391,11 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
BasicBlock *PreheaderBB;
if (!Header->isLandingPad()) {
PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
- this);
+ PP);
} else {
SmallVector<BasicBlock*, 2> NewBBs;
SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader",
- ".split-lp", this, NewBBs);
+ ".split-lp", PP, NewBBs);
PreheaderBB = NewBBs[0];
}
@@ -491,9 +492,9 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
// PlaceSplitBlockCarefully - If the block isn't already, move the new block to
// right after some 'outside block' block. This prevents the preheader from
// being placed inside the loop body, e.g. when the loop hasn't been rotated.
-void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
- SmallVectorImpl<BasicBlock*> &SplitPreds,
- Loop *L) {
+void PlaceSplitBlockCarefully(BasicBlock *NewBB,
+ SmallVectorImpl<BasicBlock*> &SplitPreds,
+ Loop *L) {
// Check to see if NewBB is already well placed.
Function::iterator BBI = NewBB; --BBI;
for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index cb581b3..162807d 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -90,7 +90,8 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
// Move all definitions in the successor to the predecessor...
OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
- std::string OldName = BB->getName();
+ // OldName will be valid until erased.
+ StringRef OldName = BB->getName();
// Erase basic block from the function...
@@ -102,12 +103,13 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
}
}
LI->removeBlock(BB);
- BB->eraseFromParent();
// Inherit predecessor's name if it exists...
if (!OldName.empty() && !OnlyPred->hasName())
OnlyPred->setName(OldName);
+ BB->eraseFromParent();
+
return OnlyPred;
}
@@ -239,8 +241,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
DEBUG(dbgs() << "!\n");
}
- std::vector<BasicBlock*> LoopBlocks = L->getBlocks();
-
bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
index 4aee8ff..e017f50 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -29,7 +29,7 @@
using namespace llvm;
-STATISTIC(IfHandled, "Number of 'expect' intrinsic intructions handled");
+STATISTIC(IfHandled, "Number of 'expect' intrinsic instructions handled");
static cl::opt<uint32_t>
LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64),
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
index 9ec84d7..9799a30 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -61,6 +61,8 @@ static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support",
namespace {
class LowerInvoke : public FunctionPass {
+ const TargetMachine *TM;
+
// Used for both models.
Constant *AbortFn;
@@ -70,15 +72,12 @@ namespace {
Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn;
bool useExpensiveEHSupport;
- // We peek in TLI to grab the target's jmp_buf size and alignment
- const TargetLowering *TLI;
-
public:
static char ID; // Pass identification, replacement for typeid
- explicit LowerInvoke(const TargetLowering *tli = NULL,
+ explicit LowerInvoke(const TargetMachine *TM = 0,
bool useExpensiveEHSupport = ExpensiveEHSupport)
- : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport),
- TLI(tli) {
+ : FunctionPass(ID), TM(TM),
+ useExpensiveEHSupport(useExpensiveEHSupport) {
initializeLowerInvokePass(*PassRegistry::getPassRegistry());
}
bool doInitialization(Module &M);
@@ -108,12 +107,9 @@ INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
char &llvm::LowerInvokePassID = LowerInvoke::ID;
// Public Interface To the LowerInvoke pass.
-FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
- return new LowerInvoke(TLI, ExpensiveEHSupport);
-}
-FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI,
+FunctionPass *llvm::createLowerInvokePass(const TargetMachine *TM,
bool useExpensiveEHSupport) {
- return new LowerInvoke(TLI, useExpensiveEHSupport);
+ return new LowerInvoke(TM, useExpensiveEHSupport || ExpensiveEHSupport);
}
// doInitialization - Make sure that there is a prototype for abort in the
@@ -122,6 +118,7 @@ bool LowerInvoke::doInitialization(Module &M) {
Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
if (useExpensiveEHSupport) {
// Insert a type for the linked list of jump buffers.
+ const TargetLowering *TLI = TM ? TM->getTargetLowering() : 0;
unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
JBSize = JBSize ? JBSize : 200;
Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
@@ -349,7 +346,6 @@ splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) {
// Scan all of the uses and see if the live range is live across an unwind
// edge. If we find a use live across an invoke edge, create an alloca
// and spill the value.
- std::set<InvokeInst*> InvokesWithStoreInserted;
// Find all of the blocks that this value is live in.
std::set<BasicBlock*> LiveBBs;
@@ -430,6 +426,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
// Create an alloca for the incoming jump buffer ptr and the new jump buffer
// that needs to be restored on all exits from the function. This is an
// alloca because the value needs to be live across invokes.
+ const TargetLowering *TLI = TM ? TM->getTargetLowering() : 0;
unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;
AllocaInst *JmpBuf =
new AllocaInst(JBLinkTy, 0, Align,
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index 955b853..2d2a8a5 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -66,6 +66,18 @@ namespace {
BasicBlock* OrigBlock, BasicBlock* Default);
unsigned Clusterify(CaseVector& Cases, SwitchInst *SI);
};
+
+ /// The comparison function for sorting the switch case values in the vector.
+ /// WARNING: Case ranges should be disjoint!
+ struct CaseCmp {
+ bool operator () (const LowerSwitch::CaseRange& C1,
+ const LowerSwitch::CaseRange& C2) {
+
+ const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+ const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+ return CI1->getValue().slt(CI2->getValue());
+ }
+ };
}
char LowerSwitch::ID = 0;
@@ -147,7 +159,7 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
Function::iterator FI = OrigBlock;
F->getBasicBlockList().insert(++FI, NewNode);
- ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_ULT,
+ ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT,
Val, Pivot.Low, "Pivot");
NewNode->getInstList().push_back(Comp);
BranchInst::Create(LBranch, RBranch, Comp, NewNode);
@@ -222,34 +234,40 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
// Clusterify - Transform simple list of Cases into list of CaseRange's
unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
-
- IntegersSubsetToBB TheClusterifier;
+ unsigned numCmps = 0;
// Start with "simple" cases
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
- i != e; ++i) {
- BasicBlock *SuccBB = i.getCaseSuccessor();
- IntegersSubset CaseRanges = i.getCaseValueEx();
- TheClusterifier.add(CaseRanges, SuccBB);
- }
-
- TheClusterifier.optimize();
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i)
+ Cases.push_back(CaseRange(i.getCaseValue(), i.getCaseValue(),
+ i.getCaseSuccessor()));
- size_t numCmps = 0;
- for (IntegersSubsetToBB::RangeIterator i = TheClusterifier.begin(),
- e = TheClusterifier.end(); i != e; ++i, ++numCmps) {
- IntegersSubsetToBB::Cluster &C = *i;
-
- // FIXME: Currently work with ConstantInt based numbers.
- // Changing it to APInt based is a pretty heavy for this commit.
- Cases.push_back(CaseRange(C.first.getLow().toConstantInt(),
- C.first.getHigh().toConstantInt(), C.second));
- if (C.first.isSingleNumber())
+ std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+ // Merge case into clusters
+ if (Cases.size()>=2)
+ for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) {
+ int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
+ int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
+ BasicBlock* nextBB = J->BB;
+ BasicBlock* currentBB = I->BB;
+
+ // If the two neighboring cases go to the same destination, merge them
+ // into a single case.
+ if ((nextValue-currentValue==1) && (currentBB == nextBB)) {
+ I->High = J->High;
+ J = Cases.erase(J);
+ } else {
+ I = J++;
+ }
+ }
+
+ for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+ if (I->Low != I->High)
// A range counts double, since it requires two compares.
++numCmps;
}
- return numCmps;
+ return numCmps;
}
// processSwitchInst - Replace the specified switch instruction with a sequence
diff --git a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
index 3716f58..c3704531 100644
--- a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
@@ -53,7 +53,7 @@ namespace {
}
bool runOnModule(Module &M) {
- static const char *metaNames[] = {
+ static const char *const metaNames[] = {
// See http://en.wikipedia.org/wiki/Metasyntactic_variable
"foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
"wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam"
diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index d090b48..ff6e6f9 100644
--- a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -62,3 +63,20 @@ void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) {
void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority) {
appendToGlobalArray("llvm.global_dtors", M, F, Priority);
}
+
+GlobalVariable *
+llvm::collectUsedGlobalVariables(Module &M, SmallPtrSet<GlobalValue *, 8> &Set,
+ bool CompilerUsed) {
+ const char *Name = CompilerUsed ? "llvm.compiler.used" : "llvm.used";
+ GlobalVariable *GV = M.getGlobalVariable(Name);
+ if (!GV || !GV->hasInitializer())
+ return GV;
+
+ const ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
+ for (unsigned I = 0, E = Init->getNumOperands(); I != E; ++I) {
+ Value *Op = Init->getOperand(I);
+ GlobalValue *G = cast<GlobalValue>(Op->stripPointerCastsNoFollowAliases());
+ Set.insert(G);
+ }
+ return GV;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index de335ec..8f6eee3 100644
--- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -27,8 +27,8 @@
#define DEBUG_TYPE "mem2reg"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -56,36 +56,13 @@ STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store");
STATISTIC(NumDeadAlloca, "Number of dead alloca's removed");
STATISTIC(NumPHIInsert, "Number of PHI nodes inserted");
-namespace llvm {
-template<>
-struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > {
- typedef std::pair<BasicBlock*, unsigned> EltTy;
- static inline EltTy getEmptyKey() {
- return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U);
- }
- static inline EltTy getTombstoneKey() {
- return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U);
- }
- static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) {
- using llvm::hash_value;
- return static_cast<unsigned>(hash_value(Val));
- }
- static bool isEqual(const EltTy &LHS, const EltTy &RHS) {
- return LHS == RHS;
- }
-};
-}
-
-/// isAllocaPromotable - Return true if this alloca is legal for promotion.
-/// This is true if there are only loads and stores to the alloca.
-///
bool llvm::isAllocaPromotable(const AllocaInst *AI) {
// FIXME: If the memory unit is of pointer or integer type, we can permit
// assignments to subsections of the memory unit.
// Only allow direct and non-volatile loads and stores...
for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
- UI != UE; ++UI) { // Loop over all of the uses of the alloca
+ UI != UE; ++UI) { // Loop over all of the uses of the alloca
const User *U = *UI;
if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
// Note that atomic loads can be transformed; atomic semantics do
@@ -94,7 +71,7 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
return false;
} else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
if (SI->getOperand(0) == AI)
- return false; // Don't allow a store OF the AI, only INTO the AI.
+ return false; // Don't allow a store OF the AI, only INTO the AI.
// Note that atomic stores can be transformed; atomic semantics do
// not have any meaning for a local alloca.
if (SI->isVolatile())
@@ -124,243 +101,217 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
}
namespace {
- struct AllocaInfo;
-
- // Data package used by RenamePass()
- class RenamePassData {
- public:
- typedef std::vector<Value *> ValVector;
-
- RenamePassData() : BB(NULL), Pred(NULL), Values() {}
- RenamePassData(BasicBlock *B, BasicBlock *P,
- const ValVector &V) : BB(B), Pred(P), Values(V) {}
- BasicBlock *BB;
- BasicBlock *Pred;
- ValVector Values;
-
- void swap(RenamePassData &RHS) {
- std::swap(BB, RHS.BB);
- std::swap(Pred, RHS.Pred);
- Values.swap(RHS.Values);
+
+struct AllocaInfo {
+ SmallVector<BasicBlock *, 32> DefiningBlocks;
+ SmallVector<BasicBlock *, 32> UsingBlocks;
+
+ StoreInst *OnlyStore;
+ BasicBlock *OnlyBlock;
+ bool OnlyUsedInOneBlock;
+
+ Value *AllocaPointerVal;
+ DbgDeclareInst *DbgDeclare;
+
+ void clear() {
+ DefiningBlocks.clear();
+ UsingBlocks.clear();
+ OnlyStore = 0;
+ OnlyBlock = 0;
+ OnlyUsedInOneBlock = true;
+ AllocaPointerVal = 0;
+ DbgDeclare = 0;
+ }
+
+ /// Scan the uses of the specified alloca, filling in the AllocaInfo used
+ /// by the rest of the pass to reason about the uses of this alloca.
+ void AnalyzeAlloca(AllocaInst *AI) {
+ clear();
+
+ // As we scan the uses of the alloca instruction, keep track of stores,
+ // and decide whether all of the loads and stores to the alloca are within
+ // the same basic block.
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E;) {
+ Instruction *User = cast<Instruction>(*UI++);
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Remember the basic blocks which define new values for the alloca
+ DefiningBlocks.push_back(SI->getParent());
+ AllocaPointerVal = SI->getOperand(0);
+ OnlyStore = SI;
+ } else {
+ LoadInst *LI = cast<LoadInst>(User);
+ // Otherwise it must be a load instruction, keep track of variable
+ // reads.
+ UsingBlocks.push_back(LI->getParent());
+ AllocaPointerVal = LI;
+ }
+
+ if (OnlyUsedInOneBlock) {
+ if (OnlyBlock == 0)
+ OnlyBlock = User->getParent();
+ else if (OnlyBlock != User->getParent())
+ OnlyUsedInOneBlock = false;
+ }
}
- };
-
- /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of
- /// load/store instructions in the block that directly load or store an alloca.
+
+ DbgDeclare = FindAllocaDbgDeclare(AI);
+ }
+};
+
+// Data package used by RenamePass()
+class RenamePassData {
+public:
+ typedef std::vector<Value *> ValVector;
+
+ RenamePassData() : BB(NULL), Pred(NULL), Values() {}
+ RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V)
+ : BB(B), Pred(P), Values(V) {}
+ BasicBlock *BB;
+ BasicBlock *Pred;
+ ValVector Values;
+
+ void swap(RenamePassData &RHS) {
+ std::swap(BB, RHS.BB);
+ std::swap(Pred, RHS.Pred);
+ Values.swap(RHS.Values);
+ }
+};
+
+/// \brief This assigns and keeps a per-bb relative ordering of load/store
+/// instructions in the block that directly load or store an alloca.
+///
+/// This functionality is important because it avoids scanning large basic
+/// blocks multiple times when promoting many allocas in the same block.
+class LargeBlockInfo {
+ /// \brief For each instruction that we track, keep the index of the
+ /// instruction.
///
- /// This functionality is important because it avoids scanning large basic
- /// blocks multiple times when promoting many allocas in the same block.
- class LargeBlockInfo {
- /// InstNumbers - For each instruction that we track, keep the index of the
- /// instruction. The index starts out as the number of the instruction from
- /// the start of the block.
- DenseMap<const Instruction *, unsigned> InstNumbers;
- public:
-
- /// isInterestingInstruction - This code only looks at accesses to allocas.
- static bool isInterestingInstruction(const Instruction *I) {
- return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
- (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
- }
-
- /// getInstructionIndex - Get or calculate the index of the specified
- /// instruction.
- unsigned getInstructionIndex(const Instruction *I) {
- assert(isInterestingInstruction(I) &&
- "Not a load/store to/from an alloca?");
-
- // If we already have this instruction number, return it.
- DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
- if (It != InstNumbers.end()) return It->second;
-
- // Scan the whole block to get the instruction. This accumulates
- // information for every interesting instruction in the block, in order to
- // avoid gratuitus rescans.
- const BasicBlock *BB = I->getParent();
- unsigned InstNo = 0;
- for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end();
- BBI != E; ++BBI)
- if (isInterestingInstruction(BBI))
- InstNumbers[BBI] = InstNo++;
- It = InstNumbers.find(I);
-
- assert(It != InstNumbers.end() && "Didn't insert instruction?");
+ /// The index starts out as the number of the instruction from the start of
+ /// the block.
+ DenseMap<const Instruction *, unsigned> InstNumbers;
+
+public:
+
+ /// This code only looks at accesses to allocas.
+ static bool isInterestingInstruction(const Instruction *I) {
+ return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
+ (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
+ }
+
+ /// Get or calculate the index of the specified instruction.
+ unsigned getInstructionIndex(const Instruction *I) {
+ assert(isInterestingInstruction(I) &&
+ "Not a load/store to/from an alloca?");
+
+ // If we already have this instruction number, return it.
+ DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
+ if (It != InstNumbers.end())
return It->second;
- }
-
- void deleteValue(const Instruction *I) {
- InstNumbers.erase(I);
- }
-
- void clear() {
- InstNumbers.clear();
- }
- };
-
- struct PromoteMem2Reg {
- /// Allocas - The alloca instructions being promoted.
- ///
- std::vector<AllocaInst*> Allocas;
- DominatorTree &DT;
- DIBuilder *DIB;
-
- /// AST - An AliasSetTracker object to update. If null, don't update it.
- ///
- AliasSetTracker *AST;
-
- /// AllocaLookup - Reverse mapping of Allocas.
- ///
- DenseMap<AllocaInst*, unsigned> AllocaLookup;
-
- /// NewPhiNodes - The PhiNodes we're adding. That map is used to simplify
- /// some Phi nodes as we iterate over it, so it should have deterministic
- /// iterators. We could use a MapVector, but since we already maintain a
- /// map from BasicBlock* to a stable numbering (BBNumbers), the DenseMap is
- /// more efficient (also supports removal).
- ///
- DenseMap<std::pair<unsigned, unsigned>, PHINode*> NewPhiNodes;
-
- /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas
- /// it corresponds to.
- DenseMap<PHINode*, unsigned> PhiToAllocaMap;
-
- /// PointerAllocaValues - If we are updating an AliasSetTracker, then for
- /// each alloca that is of pointer type, we keep track of what to copyValue
- /// to the inserted PHI nodes here.
- ///
- std::vector<Value*> PointerAllocaValues;
-
- /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare
- /// intrinsic that describes it, if any, so that we can convert it to a
- /// dbg.value intrinsic if the alloca gets promoted.
- SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares;
-
- /// Visited - The set of basic blocks the renamer has already visited.
- ///
- SmallPtrSet<BasicBlock*, 16> Visited;
-
- /// BBNumbers - Contains a stable numbering of basic blocks to avoid
- /// non-determinstic behavior.
- DenseMap<BasicBlock*, unsigned> BBNumbers;
-
- /// DomLevels - Maps DomTreeNodes to their level in the dominator tree.
- DenseMap<DomTreeNode*, unsigned> DomLevels;
-
- /// BBNumPreds - Lazily compute the number of predecessors a block has.
- DenseMap<const BasicBlock*, unsigned> BBNumPreds;
- public:
- PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
- AliasSetTracker *ast)
- : Allocas(A), DT(dt), DIB(0), AST(ast) {}
- ~PromoteMem2Reg() {
- delete DIB;
- }
- void run();
+ // Scan the whole block to get the instruction. This accumulates
+ // information for every interesting instruction in the block, in order to
+ // avoid gratuitus rescans.
+ const BasicBlock *BB = I->getParent();
+ unsigned InstNo = 0;
+ for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); BBI != E;
+ ++BBI)
+ if (isInterestingInstruction(BBI))
+ InstNumbers[BBI] = InstNo++;
+ It = InstNumbers.find(I);
+
+ assert(It != InstNumbers.end() && "Didn't insert instruction?");
+ return It->second;
+ }
- /// dominates - Return true if BB1 dominates BB2 using the DominatorTree.
- ///
- bool dominates(BasicBlock *BB1, BasicBlock *BB2) const {
- return DT.dominates(BB1, BB2);
- }
+ void deleteValue(const Instruction *I) { InstNumbers.erase(I); }
- private:
- void RemoveFromAllocasList(unsigned &AllocaIdx) {
- Allocas[AllocaIdx] = Allocas.back();
- Allocas.pop_back();
- --AllocaIdx;
- }
+ void clear() { InstNumbers.clear(); }
+};
- unsigned getNumPreds(const BasicBlock *BB) {
- unsigned &NP = BBNumPreds[BB];
- if (NP == 0)
- NP = std::distance(pred_begin(BB), pred_end(BB))+1;
- return NP-1;
- }
+struct PromoteMem2Reg {
+ /// The alloca instructions being promoted.
+ std::vector<AllocaInst *> Allocas;
+ DominatorTree &DT;
+ DIBuilder DIB;
- void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
- AllocaInfo &Info);
- void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
- const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
- SmallPtrSet<BasicBlock*, 32> &LiveInBlocks);
-
- void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
- LargeBlockInfo &LBI);
- void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
- LargeBlockInfo &LBI);
-
- void RenamePass(BasicBlock *BB, BasicBlock *Pred,
- RenamePassData::ValVector &IncVals,
- std::vector<RenamePassData> &Worklist);
- bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
- };
-
- struct AllocaInfo {
- SmallVector<BasicBlock*, 32> DefiningBlocks;
- SmallVector<BasicBlock*, 32> UsingBlocks;
-
- StoreInst *OnlyStore;
- BasicBlock *OnlyBlock;
- bool OnlyUsedInOneBlock;
-
- Value *AllocaPointerVal;
- DbgDeclareInst *DbgDeclare;
-
- void clear() {
- DefiningBlocks.clear();
- UsingBlocks.clear();
- OnlyStore = 0;
- OnlyBlock = 0;
- OnlyUsedInOneBlock = true;
- AllocaPointerVal = 0;
- DbgDeclare = 0;
- }
-
- /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our
- /// ivars.
- void AnalyzeAlloca(AllocaInst *AI) {
- clear();
-
- // As we scan the uses of the alloca instruction, keep track of stores,
- // and decide whether all of the loads and stores to the alloca are within
- // the same basic block.
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
- UI != E;) {
- Instruction *User = cast<Instruction>(*UI++);
-
- if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
- // Remember the basic blocks which define new values for the alloca
- DefiningBlocks.push_back(SI->getParent());
- AllocaPointerVal = SI->getOperand(0);
- OnlyStore = SI;
- } else {
- LoadInst *LI = cast<LoadInst>(User);
- // Otherwise it must be a load instruction, keep track of variable
- // reads.
- UsingBlocks.push_back(LI->getParent());
- AllocaPointerVal = LI;
- }
-
- if (OnlyUsedInOneBlock) {
- if (OnlyBlock == 0)
- OnlyBlock = User->getParent();
- else if (OnlyBlock != User->getParent())
- OnlyUsedInOneBlock = false;
- }
- }
-
- DbgDeclare = FindAllocaDbgDeclare(AI);
- }
- };
+ /// An AliasSetTracker object to update. If null, don't update it.
+ AliasSetTracker *AST;
- typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair;
+ /// Reverse mapping of Allocas.
+ DenseMap<AllocaInst *, unsigned> AllocaLookup;
- struct DomTreeNodeCompare {
- bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) {
- return LHS.second < RHS.second;
- }
- };
-} // end of anonymous namespace
+ /// \brief The PhiNodes we're adding.
+ ///
+ /// That map is used to simplify some Phi nodes as we iterate over it, so
+ /// it should have deterministic iterators. We could use a MapVector, but
+ /// since we already maintain a map from BasicBlock* to a stable numbering
+ /// (BBNumbers), the DenseMap is more efficient (also supports removal).
+ DenseMap<std::pair<unsigned, unsigned>, PHINode *> NewPhiNodes;
+
+ /// For each PHI node, keep track of which entry in Allocas it corresponds
+ /// to.
+ DenseMap<PHINode *, unsigned> PhiToAllocaMap;
+
+ /// If we are updating an AliasSetTracker, then for each alloca that is of
+ /// pointer type, we keep track of what to copyValue to the inserted PHI
+ /// nodes here.
+ std::vector<Value *> PointerAllocaValues;
+
+ /// For each alloca, we keep track of the dbg.declare intrinsic that
+ /// describes it, if any, so that we can convert it to a dbg.value
+ /// intrinsic if the alloca gets promoted.
+ SmallVector<DbgDeclareInst *, 8> AllocaDbgDeclares;
+
+ /// The set of basic blocks the renamer has already visited.
+ ///
+ SmallPtrSet<BasicBlock *, 16> Visited;
+
+ /// Contains a stable numbering of basic blocks to avoid non-determinstic
+ /// behavior.
+ DenseMap<BasicBlock *, unsigned> BBNumbers;
+
+ /// Maps DomTreeNodes to their level in the dominator tree.
+ DenseMap<DomTreeNode *, unsigned> DomLevels;
+
+ /// Lazily compute the number of predecessors a block has.
+ DenseMap<const BasicBlock *, unsigned> BBNumPreds;
+
+public:
+ PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
+ AliasSetTracker *AST)
+ : Allocas(Allocas.begin(), Allocas.end()), DT(DT),
+ DIB(*DT.getRoot()->getParent()->getParent()), AST(AST) {}
+
+ void run();
+
+private:
+ void RemoveFromAllocasList(unsigned &AllocaIdx) {
+ Allocas[AllocaIdx] = Allocas.back();
+ Allocas.pop_back();
+ --AllocaIdx;
+ }
+
+ unsigned getNumPreds(const BasicBlock *BB) {
+ unsigned &NP = BBNumPreds[BB];
+ if (NP == 0)
+ NP = std::distance(pred_begin(BB), pred_end(BB)) + 1;
+ return NP - 1;
+ }
+
+ void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
+ AllocaInfo &Info);
+ void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
+ const SmallPtrSet<BasicBlock *, 32> &DefBlocks,
+ SmallPtrSet<BasicBlock *, 32> &LiveInBlocks);
+ void RenamePass(BasicBlock *BB, BasicBlock *Pred,
+ RenamePassData::ValVector &IncVals,
+ std::vector<RenamePassData> &Worklist);
+ bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
+};
+
+} // end of anonymous namespace
static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
// Knowing that this alloca is promotable, we know that it's safe to kill all
@@ -388,10 +339,191 @@ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
}
}
+/// \brief Rewrite as many loads as possible given a single store.
+///
+/// When there is only a single store, we can use the domtree to trivially
+/// replace all of the dominated loads with the stored value. Do so, and return
+/// true if this has successfully promoted the alloca entirely. If this returns
+/// false there were some loads which were not dominated by the single store
+/// and thus must be phi-ed with undef. We fall back to the standard alloca
+/// promotion algorithm in that case.
+static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
+ LargeBlockInfo &LBI,
+ DominatorTree &DT,
+ AliasSetTracker *AST) {
+ StoreInst *OnlyStore = Info.OnlyStore;
+ bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
+ BasicBlock *StoreBB = OnlyStore->getParent();
+ int StoreIndex = -1;
+
+ // Clear out UsingBlocks. We will reconstruct it here if needed.
+ Info.UsingBlocks.clear();
+
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
+ Instruction *UserInst = cast<Instruction>(*UI++);
+ if (!isa<LoadInst>(UserInst)) {
+ assert(UserInst == OnlyStore && "Should only have load/stores");
+ continue;
+ }
+ LoadInst *LI = cast<LoadInst>(UserInst);
+
+ // Okay, if we have a load from the alloca, we want to replace it with the
+ // only value stored to the alloca. We can do this if the value is
+ // dominated by the store. If not, we use the rest of the mem2reg machinery
+ // to insert the phi nodes as needed.
+ if (!StoringGlobalVal) { // Non-instructions are always dominated.
+ if (LI->getParent() == StoreBB) {
+ // If we have a use that is in the same block as the store, compare the
+ // indices of the two instructions to see which one came first. If the
+ // load came before the store, we can't handle it.
+ if (StoreIndex == -1)
+ StoreIndex = LBI.getInstructionIndex(OnlyStore);
+
+ if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
+ // Can't handle this load, bail out.
+ Info.UsingBlocks.push_back(StoreBB);
+ continue;
+ }
+
+ } else if (LI->getParent() != StoreBB &&
+ !DT.dominates(StoreBB, LI->getParent())) {
+ // If the load and store are in different blocks, use BB dominance to
+ // check their relationships. If the store doesn't dom the use, bail
+ // out.
+ Info.UsingBlocks.push_back(LI->getParent());
+ continue;
+ }
+ }
+
+ // Otherwise, we *can* safely rewrite this load.
+ Value *ReplVal = OnlyStore->getOperand(0);
+ // If the replacement value is the load, this must occur in unreachable
+ // code.
+ if (ReplVal == LI)
+ ReplVal = UndefValue::get(LI->getType());
+ LI->replaceAllUsesWith(ReplVal);
+ if (AST && LI->getType()->isPointerTy())
+ AST->deleteValue(LI);
+ LI->eraseFromParent();
+ LBI.deleteValue(LI);
+ }
+
+ // Finally, after the scan, check to see if the store is all that is left.
+ if (!Info.UsingBlocks.empty())
+ return false; // If not, we'll have to fall back for the remainder.
+
+ // Record debuginfo for the store and remove the declaration's
+ // debuginfo.
+ if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+ DIBuilder DIB(*AI->getParent()->getParent()->getParent());
+ ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB);
+ DDI->eraseFromParent();
+ LBI.deleteValue(DDI);
+ }
+ // Remove the (now dead) store and alloca.
+ Info.OnlyStore->eraseFromParent();
+ LBI.deleteValue(Info.OnlyStore);
+
+ if (AST)
+ AST->deleteValue(AI);
+ AI->eraseFromParent();
+ LBI.deleteValue(AI);
+ return true;
+}
+
+/// Many allocas are only used within a single basic block. If this is the
+/// case, avoid traversing the CFG and inserting a lot of potentially useless
+/// PHI nodes by just performing a single linear pass over the basic block
+/// using the Alloca.
+///
+/// If we cannot promote this alloca (because it is read before it is written),
+/// return true. This is necessary in cases where, due to control flow, the
+/// alloca is potentially undefined on some control flow paths. e.g. code like
+/// this is potentially correct:
+///
+/// for (...) { if (c) { A = undef; undef = B; } }
+///
+/// ... so long as A is not used before undef is set.
+static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
+ LargeBlockInfo &LBI,
+ AliasSetTracker *AST) {
+ // The trickiest case to handle is when we have large blocks. Because of this,
+ // this code is optimized assuming that large blocks happen. This does not
+ // significantly pessimize the small block case. This uses LargeBlockInfo to
+ // make it efficient to get the index of various operations in the block.
+
+ // Walk the use-def list of the alloca, getting the locations of all stores.
+ typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy;
+ StoresByIndexTy StoresByIndex;
+
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;
+ ++UI)
+ if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+ StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
+
+ // Sort the stores by their index, making it efficient to do a lookup with a
+ // binary search.
+ std::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first());
+
+ // Walk all of the loads from this alloca, replacing them with the nearest
+ // store above them, if any.
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI++);
+ if (!LI)
+ continue;
+
+ unsigned LoadIdx = LBI.getInstructionIndex(LI);
+
+ // Find the nearest store that has a lower index than this load.
+ StoresByIndexTy::iterator I =
+ std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
+ std::make_pair(LoadIdx, static_cast<StoreInst *>(0)),
+ less_first());
+
+ if (I == StoresByIndex.begin())
+ // If there is no store before this load, the load takes the undef value.
+ LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+ else
+ // Otherwise, there was a store before this load, the load takes its value.
+ LI->replaceAllUsesWith(llvm::prior(I)->second->getOperand(0));
+
+ if (AST && LI->getType()->isPointerTy())
+ AST->deleteValue(LI);
+ LI->eraseFromParent();
+ LBI.deleteValue(LI);
+ }
+
+ // Remove the (now dead) stores and alloca.
+ while (!AI->use_empty()) {
+ StoreInst *SI = cast<StoreInst>(AI->use_back());
+ // Record debuginfo for the store before removing it.
+ if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+ DIBuilder DIB(*AI->getParent()->getParent()->getParent());
+ ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+ }
+ SI->eraseFromParent();
+ LBI.deleteValue(SI);
+ }
+
+ if (AST)
+ AST->deleteValue(AI);
+ AI->eraseFromParent();
+ LBI.deleteValue(AI);
+
+ // The alloca's debuginfo can be removed as well.
+ if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+ DDI->eraseFromParent();
+ LBI.deleteValue(DDI);
+ }
+
+ ++NumLocalPromoted;
+}
+
void PromoteMem2Reg::run() {
Function &F = *DT.getRoot()->getParent();
- if (AST) PointerAllocaValues.resize(Allocas.size());
+ if (AST)
+ PointerAllocaValues.resize(Allocas.size());
AllocaDbgDeclares.resize(Allocas.size());
AllocaInfo Info;
@@ -400,8 +532,7 @@ void PromoteMem2Reg::run() {
for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
AllocaInst *AI = Allocas[AllocaNum];
- assert(isAllocaPromotable(AI) &&
- "Cannot promote non-promotable alloca!");
+ assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!");
assert(AI->getParent()->getParent() == &F &&
"All allocas should be in the same function, which is same as DF!");
@@ -409,7 +540,8 @@ void PromoteMem2Reg::run() {
if (AI->use_empty()) {
// If there are no uses of the alloca, just delete it now.
- if (AST) AST->deleteValue(AI);
+ if (AST)
+ AST->deleteValue(AI);
AI->eraseFromParent();
// Remove the alloca from the Allocas list, since it has been processed
@@ -417,7 +549,7 @@ void PromoteMem2Reg::run() {
++NumDeadAlloca;
continue;
}
-
+
// Calculate the set of read and write-locations for each alloca. This is
// analogous to finding the 'uses' and 'definitions' of each variable.
Info.AnalyzeAlloca(AI);
@@ -425,75 +557,27 @@ void PromoteMem2Reg::run() {
// If there is only a single store to this value, replace any loads of
// it that are directly dominated by the definition with the value stored.
if (Info.DefiningBlocks.size() == 1) {
- RewriteSingleStoreAlloca(AI, Info, LBI);
-
- // Finally, after the scan, check to see if the store is all that is left.
- if (Info.UsingBlocks.empty()) {
- // Record debuginfo for the store and remove the declaration's
- // debuginfo.
- if (DbgDeclareInst *DDI = Info.DbgDeclare) {
- if (!DIB)
- DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent());
- ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB);
- DDI->eraseFromParent();
- }
- // Remove the (now dead) store and alloca.
- Info.OnlyStore->eraseFromParent();
- LBI.deleteValue(Info.OnlyStore);
-
- if (AST) AST->deleteValue(AI);
- AI->eraseFromParent();
- LBI.deleteValue(AI);
-
+ if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) {
// The alloca has been processed, move on.
RemoveFromAllocasList(AllocaNum);
-
++NumSingleStore;
continue;
}
}
-
+
// If the alloca is only read and written in one basic block, just perform a
// linear sweep over the block to eliminate it.
if (Info.OnlyUsedInOneBlock) {
- PromoteSingleBlockAlloca(AI, Info, LBI);
-
- // Finally, after the scan, check to see if the stores are all that is
- // left.
- if (Info.UsingBlocks.empty()) {
-
- // Remove the (now dead) stores and alloca.
- while (!AI->use_empty()) {
- StoreInst *SI = cast<StoreInst>(AI->use_back());
- // Record debuginfo for the store before removing it.
- if (DbgDeclareInst *DDI = Info.DbgDeclare) {
- if (!DIB)
- DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
- ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
- }
- SI->eraseFromParent();
- LBI.deleteValue(SI);
- }
-
- if (AST) AST->deleteValue(AI);
- AI->eraseFromParent();
- LBI.deleteValue(AI);
-
- // The alloca has been processed, move on.
- RemoveFromAllocasList(AllocaNum);
-
- // The alloca's debuginfo can be removed as well.
- if (DbgDeclareInst *DDI = Info.DbgDeclare)
- DDI->eraseFromParent();
+ promoteSingleBlockAlloca(AI, Info, LBI, AST);
- ++NumLocalPromoted;
- continue;
- }
+ // The alloca has been processed, move on.
+ RemoveFromAllocasList(AllocaNum);
+ continue;
}
// If we haven't computed dominator tree levels, do so now.
if (DomLevels.empty()) {
- SmallVector<DomTreeNode*, 32> Worklist;
+ SmallVector<DomTreeNode *, 32> Worklist;
DomTreeNode *Root = DT.getRootNode();
DomLevels[Root] = 0;
@@ -522,10 +606,11 @@ void PromoteMem2Reg::run() {
// stored into the alloca.
if (AST)
PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;
-
+
// Remember the dbg.declare intrinsic describing this alloca, if any.
- if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
-
+ if (Info.DbgDeclare)
+ AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
+
// Keep the reverse mapping of the 'Allocas' array for the rename pass.
AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
@@ -540,8 +625,7 @@ void PromoteMem2Reg::run() {
return; // All of the allocas must have been trivial!
LBI.clear();
-
-
+
// Set the incoming values for the basic block to be null values for all of
// the alloca's. We do this in case there is a load of a value that has not
// been stored yet. In this case, it will get this null value.
@@ -562,7 +646,7 @@ void PromoteMem2Reg::run() {
// RenamePass may add new worklist entries.
RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
} while (!RenamePassWorkList.empty());
-
+
// The renamer uses the Visited set to avoid infinite loops. Clear it now.
Visited.clear();
@@ -575,7 +659,8 @@ void PromoteMem2Reg::run() {
// tree. Just delete the users now.
if (!A->use_empty())
A->replaceAllUsesWith(UndefValue::get(A->getType()));
- if (AST) AST->deleteValue(A);
+ if (AST)
+ AST->deleteValue(A);
A->eraseFromParent();
}
@@ -591,13 +676,15 @@ void PromoteMem2Reg::run() {
bool EliminatedAPHI = true;
while (EliminatedAPHI) {
EliminatedAPHI = false;
-
+
// Iterating over NewPhiNodes is deterministic, so it is safe to try to
// simplify and RAUW them as we go. If it was not, we could add uses to
// the values we replace with in a non deterministic order, thus creating
// non deterministic def->use chains.
- for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I =
- NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) {
+ for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
+ I = NewPhiNodes.begin(),
+ E = NewPhiNodes.end();
+ I != E;) {
PHINode *PN = I->second;
// If this PHI node merges one value and/or undefs, get the value.
@@ -613,15 +700,17 @@ void PromoteMem2Reg::run() {
++I;
}
}
-
+
// At this point, the renamer has added entries to PHI nodes for all reachable
// code. Unfortunately, there may be unreachable blocks which the renamer
// hasn't traversed. If this is the case, the PHI nodes may not
// have incoming values for all predecessors. Loop over all PHI nodes we have
// created, inserting undef values if they are missing any incoming values.
//
- for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I =
- NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) {
+ for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
+ I = NewPhiNodes.begin(),
+ E = NewPhiNodes.end();
+ I != E; ++I) {
// We want to do this once per basic block. As such, only process a block
// when we find the PHI that is the first entry in the block.
PHINode *SomePHI = I->second;
@@ -636,21 +725,20 @@ void PromoteMem2Reg::run() {
continue;
// Get the preds for BB.
- SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
-
+ SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
+
// Ok, now we know that all of the PHI nodes are missing entries for some
// basic blocks. Start by sorting the incoming predecessors for efficient
// access.
std::sort(Preds.begin(), Preds.end());
-
+
// Now we loop through all BB's which have entries in SomePHI and remove
// them from the Preds list.
for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
// Do a log(n) search of the Preds list for the entry we want.
- SmallVector<BasicBlock*, 16>::iterator EntIt =
- std::lower_bound(Preds.begin(), Preds.end(),
- SomePHI->getIncomingBlock(i));
- assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&&
+ SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound(
+ Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i));
+ assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) &&
"PHI node has entry for a block which is not a predecessor!");
// Remove the entry
@@ -670,39 +758,41 @@ void PromoteMem2Reg::run() {
SomePHI->addIncoming(UndefVal, Preds[pred]);
}
}
-
+
NewPhiNodes.clear();
}
+/// \brief Determine which blocks the value is live in.
+///
+/// These are blocks which lead to uses. Knowing this allows us to avoid
+/// inserting PHI nodes into blocks which don't lead to uses (thus, the
+/// inserted phi nodes would be dead).
+void PromoteMem2Reg::ComputeLiveInBlocks(
+ AllocaInst *AI, AllocaInfo &Info,
+ const SmallPtrSet<BasicBlock *, 32> &DefBlocks,
+ SmallPtrSet<BasicBlock *, 32> &LiveInBlocks) {
-/// ComputeLiveInBlocks - Determine which blocks the value is live in. These
-/// are blocks which lead to uses. Knowing this allows us to avoid inserting
-/// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes
-/// would be dead).
-void PromoteMem2Reg::
-ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
- const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
- SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) {
-
// To determine liveness, we must iterate through the predecessors of blocks
// where the def is live. Blocks are added to the worklist if we need to
// check their predecessors. Start with all the using blocks.
- SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
- Info.UsingBlocks.end());
-
+ SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
+ Info.UsingBlocks.end());
+
// If any of the using blocks is also a definition block, check to see if the
// definition occurs before or after the use. If it happens before the use,
// the value isn't really live-in.
for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
BasicBlock *BB = LiveInBlockWorklist[i];
- if (!DefBlocks.count(BB)) continue;
-
+ if (!DefBlocks.count(BB))
+ continue;
+
// Okay, this is a block that both uses and defines the value. If the first
// reference to the alloca is a def (store), then we know it isn't live-in.
- for (BasicBlock::iterator I = BB->begin(); ; ++I) {
+ for (BasicBlock::iterator I = BB->begin();; ++I) {
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (SI->getOperand(1) != AI) continue;
-
+ if (SI->getOperand(1) != AI)
+ continue;
+
// We found a store to the alloca before a load. The alloca is not
// actually live-in here.
LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
@@ -710,73 +800,76 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
--i, --e;
break;
}
-
+
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (LI->getOperand(0) != AI) continue;
-
+ if (LI->getOperand(0) != AI)
+ continue;
+
// Okay, we found a load before a store to the alloca. It is actually
// live into this block.
break;
}
}
}
-
+
// Now that we have a set of blocks where the phi is live-in, recursively add
// their predecessors until we find the full region the value is live.
while (!LiveInBlockWorklist.empty()) {
BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
-
+
// The block really is live in here, insert it into the set. If already in
// the set, then it has already been processed.
if (!LiveInBlocks.insert(BB))
continue;
-
+
// Since the value is live into BB, it is either defined in a predecessor or
// live into it to. Add the preds to the worklist unless they are a
// defining block.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
BasicBlock *P = *PI;
-
+
// The value is not live into a predecessor if it defines the value.
if (DefBlocks.count(P))
continue;
-
+
// Otherwise it is, add to the worklist.
LiveInBlockWorklist.push_back(P);
}
}
}
-/// DetermineInsertionPoint - At this point, we're committed to promoting the
-/// alloca using IDF's, and the standard SSA construction algorithm. Determine
-/// which blocks need phi nodes and see if we can optimize out some work by
-/// avoiding insertion of dead phi nodes.
+/// At this point, we're committed to promoting the alloca using IDF's, and the
+/// standard SSA construction algorithm. Determine which blocks need phi nodes
+/// and see if we can optimize out some work by avoiding insertion of dead phi
+/// nodes.
void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
AllocaInfo &Info) {
// Unique the set of defining blocks for efficient lookup.
- SmallPtrSet<BasicBlock*, 32> DefBlocks;
+ SmallPtrSet<BasicBlock *, 32> DefBlocks;
DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
// Determine which blocks the value is live in. These are blocks which lead
// to uses.
- SmallPtrSet<BasicBlock*, 32> LiveInBlocks;
+ SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
// Use a priority queue keyed on dominator tree level so that inserted nodes
// are handled from the bottom of the dominator tree upwards.
+ typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
- DomTreeNodeCompare> IDFPriorityQueue;
+ less_second> IDFPriorityQueue;
IDFPriorityQueue PQ;
- for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(),
- E = DefBlocks.end(); I != E; ++I) {
+ for (SmallPtrSet<BasicBlock *, 32>::const_iterator I = DefBlocks.begin(),
+ E = DefBlocks.end();
+ I != E; ++I) {
if (DomTreeNode *Node = DT.getNode(*I))
PQ.push(std::make_pair(Node, DomLevels[Node]));
}
- SmallVector<std::pair<unsigned, BasicBlock*>, 32> DFBlocks;
- SmallPtrSet<DomTreeNode*, 32> Visited;
- SmallVector<DomTreeNode*, 32> Worklist;
+ SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks;
+ SmallPtrSet<DomTreeNode *, 32> Visited;
+ SmallVector<DomTreeNode *, 32> Worklist;
while (!PQ.empty()) {
DomTreeNodePair RootPair = PQ.top();
PQ.pop();
@@ -836,179 +929,22 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
}
-/// RewriteSingleStoreAlloca - If there is only a single store to this value,
-/// replace any loads of it that are directly dominated by the definition with
-/// the value stored.
-void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI,
- AllocaInfo &Info,
- LargeBlockInfo &LBI) {
- StoreInst *OnlyStore = Info.OnlyStore;
- bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
- BasicBlock *StoreBB = OnlyStore->getParent();
- int StoreIndex = -1;
-
- // Clear out UsingBlocks. We will reconstruct it here if needed.
- Info.UsingBlocks.clear();
-
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) {
- Instruction *UserInst = cast<Instruction>(*UI++);
- if (!isa<LoadInst>(UserInst)) {
- assert(UserInst == OnlyStore && "Should only have load/stores");
- continue;
- }
- LoadInst *LI = cast<LoadInst>(UserInst);
-
- // Okay, if we have a load from the alloca, we want to replace it with the
- // only value stored to the alloca. We can do this if the value is
- // dominated by the store. If not, we use the rest of the mem2reg machinery
- // to insert the phi nodes as needed.
- if (!StoringGlobalVal) { // Non-instructions are always dominated.
- if (LI->getParent() == StoreBB) {
- // If we have a use that is in the same block as the store, compare the
- // indices of the two instructions to see which one came first. If the
- // load came before the store, we can't handle it.
- if (StoreIndex == -1)
- StoreIndex = LBI.getInstructionIndex(OnlyStore);
-
- if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
- // Can't handle this load, bail out.
- Info.UsingBlocks.push_back(StoreBB);
- continue;
- }
-
- } else if (LI->getParent() != StoreBB &&
- !dominates(StoreBB, LI->getParent())) {
- // If the load and store are in different blocks, use BB dominance to
- // check their relationships. If the store doesn't dom the use, bail
- // out.
- Info.UsingBlocks.push_back(LI->getParent());
- continue;
- }
- }
-
- // Otherwise, we *can* safely rewrite this load.
- Value *ReplVal = OnlyStore->getOperand(0);
- // If the replacement value is the load, this must occur in unreachable
- // code.
- if (ReplVal == LI)
- ReplVal = UndefValue::get(LI->getType());
- LI->replaceAllUsesWith(ReplVal);
- if (AST && LI->getType()->isPointerTy())
- AST->deleteValue(LI);
- LI->eraseFromParent();
- LBI.deleteValue(LI);
- }
-}
-
-namespace {
-
-/// StoreIndexSearchPredicate - This is a helper predicate used to search by the
-/// first element of a pair.
-struct StoreIndexSearchPredicate {
- bool operator()(const std::pair<unsigned, StoreInst*> &LHS,
- const std::pair<unsigned, StoreInst*> &RHS) {
- return LHS.first < RHS.first;
- }
-};
-
-}
-
-/// PromoteSingleBlockAlloca - Many allocas are only used within a single basic
-/// block. If this is the case, avoid traversing the CFG and inserting a lot of
-/// potentially useless PHI nodes by just performing a single linear pass over
-/// the basic block using the Alloca.
-///
-/// If we cannot promote this alloca (because it is read before it is written),
-/// return true. This is necessary in cases where, due to control flow, the
-/// alloca is potentially undefined on some control flow paths. e.g. code like
-/// this is potentially correct:
-///
-/// for (...) { if (c) { A = undef; undef = B; } }
-///
-/// ... so long as A is not used before undef is set.
+/// \brief Queue a phi-node to be added to a basic-block for a specific Alloca.
///
-void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
- LargeBlockInfo &LBI) {
- // The trickiest case to handle is when we have large blocks. Because of this,
- // this code is optimized assuming that large blocks happen. This does not
- // significantly pessimize the small block case. This uses LargeBlockInfo to
- // make it efficient to get the index of various operations in the block.
-
- // Clear out UsingBlocks. We will reconstruct it here if needed.
- Info.UsingBlocks.clear();
-
- // Walk the use-def list of the alloca, getting the locations of all stores.
- typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy;
- StoresByIndexTy StoresByIndex;
-
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
- UI != E; ++UI)
- if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
- StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
-
- // If there are no stores to the alloca, just replace any loads with undef.
- if (StoresByIndex.empty()) {
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;)
- if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) {
- LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
- if (AST && LI->getType()->isPointerTy())
- AST->deleteValue(LI);
- LBI.deleteValue(LI);
- LI->eraseFromParent();
- }
- return;
- }
-
- // Sort the stores by their index, making it efficient to do a lookup with a
- // binary search.
- std::sort(StoresByIndex.begin(), StoresByIndex.end());
-
- // Walk all of the loads from this alloca, replacing them with the nearest
- // store above them, if any.
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
- LoadInst *LI = dyn_cast<LoadInst>(*UI++);
- if (!LI) continue;
-
- unsigned LoadIdx = LBI.getInstructionIndex(LI);
-
- // Find the nearest store that has a lower than this load.
- StoresByIndexTy::iterator I =
- std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
- std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)),
- StoreIndexSearchPredicate());
-
- // If there is no store before this load, then we can't promote this load.
- if (I == StoresByIndex.begin()) {
- // Can't handle this load, bail out.
- Info.UsingBlocks.push_back(LI->getParent());
- continue;
- }
-
- // Otherwise, there was a store before this load, the load takes its value.
- --I;
- LI->replaceAllUsesWith(I->second->getOperand(0));
- if (AST && LI->getType()->isPointerTy())
- AST->deleteValue(LI);
- LI->eraseFromParent();
- LBI.deleteValue(LI);
- }
-}
-
-// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific
-// Alloca returns true if there wasn't already a phi-node for that variable
-//
+/// Returns true if there wasn't already a phi-node for that variable
bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
unsigned &Version) {
// Look up the basic-block in question.
PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)];
// If the BB already has a phi node added for the i'th alloca then we're done!
- if (PN) return false;
+ if (PN)
+ return false;
// Create a PhiNode using the dereferenced type... and add the phi-node to the
// BasicBlock.
PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
- Allocas[AllocaNo]->getName() + "." + Twine(Version++),
+ Allocas[AllocaNo]->getName() + "." + Twine(Version++),
BB->begin());
++NumPHIInsert;
PhiToAllocaMap[PN] = AllocaNo;
@@ -1019,10 +955,11 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
return true;
}
-// RenamePass - Recursively traverse the CFG of the function, renaming loads and
-// stores to the allocas which we are promoting. IncomingVals indicates what
-// value each Alloca contains on exit from the predecessor block Pred.
-//
+/// \brief Recursively traverse the CFG of the function, renaming loads and
+/// stores to the allocas which we are promoting.
+///
+/// IncomingVals indicates what value each Alloca contains on exit from the
+/// predecessor block Pred.
void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
RenamePassData::ValVector &IncomingVals,
std::vector<RenamePassData> &Worklist) {
@@ -1040,48 +977,49 @@ NextIteration:
// inserted by this pass of mem2reg will have the same number of incoming
// operands so far. Remember this count.
unsigned NewPHINumOperands = APN->getNumOperands();
-
- unsigned NumEdges = 0;
- for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I)
- if (*I == BB)
- ++NumEdges;
+
+ unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB);
assert(NumEdges && "Must be at least one edge from Pred to BB!");
-
+
// Add entries for all the phis.
BasicBlock::iterator PNI = BB->begin();
do {
unsigned AllocaNo = PhiToAllocaMap[APN];
-
+
// Add N incoming values to the PHI node.
for (unsigned i = 0; i != NumEdges; ++i)
APN->addIncoming(IncomingVals[AllocaNo], Pred);
-
+
// The currently active variable for this block is now the PHI.
IncomingVals[AllocaNo] = APN;
-
+
// Get the next phi node.
++PNI;
APN = dyn_cast<PHINode>(PNI);
- if (APN == 0) break;
-
+ if (APN == 0)
+ break;
+
// Verify that it is missing entries. If not, it is not being inserted
// by this mem2reg invocation so we want to ignore it.
} while (APN->getNumOperands() == NewPHINumOperands);
}
}
-
+
// Don't revisit blocks.
- if (!Visited.insert(BB)) return;
+ if (!Visited.insert(BB))
+ return;
- for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) {
+ for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) {
Instruction *I = II++; // get the instruction, increment iterator
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
- if (!Src) continue;
-
- DenseMap<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src);
- if (AI == AllocaLookup.end()) continue;
+ if (!Src)
+ continue;
+
+ DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src);
+ if (AI == AllocaLookup.end())
+ continue;
Value *V = IncomingVals[AI->second];
@@ -1094,30 +1032,29 @@ NextIteration:
// Delete this instruction and mark the name as the current holder of the
// value
AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
- if (!Dest) continue;
-
+ if (!Dest)
+ continue;
+
DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
if (ai == AllocaLookup.end())
continue;
-
+
// what value were we writing?
IncomingVals[ai->second] = SI->getOperand(0);
// Record debuginfo for the store before removing it.
- if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) {
- if (!DIB)
- DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
- ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
- }
+ if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second])
+ ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
BB->getInstList().erase(SI);
}
}
// 'Recurse' to our successors.
succ_iterator I = succ_begin(BB), E = succ_end(BB);
- if (I == E) return;
+ if (I == E)
+ return;
// Keep track of the successors so we don't visit the same successor twice
- SmallPtrSet<BasicBlock*, 8> VisitedSuccs;
+ SmallPtrSet<BasicBlock *, 8> VisitedSuccs;
// Handle the first successor without using the worklist.
VisitedSuccs.insert(*I);
@@ -1132,18 +1069,11 @@ NextIteration:
goto NextIteration;
}
-/// PromoteMemToReg - Promote the specified list of alloca instructions into
-/// scalar registers, inserting PHI nodes as appropriate. This function does
-/// not modify the CFG of the function at all. All allocas must be from the
-/// same function.
-///
-/// If AST is specified, the specified tracker is updated to reflect changes
-/// made to the IR.
-///
-void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
- DominatorTree &DT, AliasSetTracker *AST) {
+void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
+ AliasSetTracker *AST) {
// If there is nothing to do, bail out...
- if (Allocas.empty()) return;
+ if (Allocas.empty())
+ return;
PromoteMem2Reg(Allocas, DT, AST).run();
}
diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index 9d90fbe..30adbfa 100644
--- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -42,8 +42,6 @@ SSAUpdater::~SSAUpdater() {
delete static_cast<AvailableValsTy*>(AV);
}
-/// Initialize - Reset this object to get ready for a new set of SSA
-/// updates with type 'Ty'. PHI nodes get a name based on 'Name'.
void SSAUpdater::Initialize(Type *Ty, StringRef Name) {
if (AV == 0)
AV = new AvailableValsTy();
@@ -53,14 +51,10 @@ void SSAUpdater::Initialize(Type *Ty, StringRef Name) {
ProtoName = Name;
}
-/// HasValueForBlock - Return true if the SSAUpdater already has a value for
-/// the specified block.
bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
return getAvailableVals(AV).count(BB);
}
-/// AddAvailableValue - Indicate that a rewritten value is available in the
-/// specified block with the specified value.
void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
assert(ProtoType != 0 && "Need to initialize SSAUpdater");
assert(ProtoType == V->getType() &&
@@ -68,10 +62,8 @@ void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
getAvailableVals(AV)[BB] = V;
}
-/// IsEquivalentPHI - Check if PHI has the same incoming value as specified
-/// in ValueMapping for each predecessor block.
static bool IsEquivalentPHI(PHINode *PHI,
- DenseMap<BasicBlock*, Value*> &ValueMapping) {
+ SmallDenseMap<BasicBlock*, Value*, 8> &ValueMapping) {
unsigned PHINumValues = PHI->getNumIncomingValues();
if (PHINumValues != ValueMapping.size())
return false;
@@ -86,32 +78,11 @@ static bool IsEquivalentPHI(PHINode *PHI,
return true;
}
-/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
-/// live at the end of the specified block.
Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
Value *Res = GetValueAtEndOfBlockInternal(BB);
return Res;
}
-/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
-/// is live in the middle of the specified block.
-///
-/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
-/// important case: if there is a definition of the rewritten value after the
-/// 'use' in BB. Consider code like this:
-///
-/// X1 = ...
-/// SomeBB:
-/// use(X)
-/// X2 = ...
-/// br Cond, SomeBB, OutBB
-///
-/// In this case, there are two values (X1 and X2) added to the AvailableVals
-/// set by the client of the rewriter, and those values are both live out of
-/// their respective blocks. However, the use of X happens in the *middle* of
-/// a block. Because of this, we need to insert a new PHI node in SomeBB to
-/// merge the appropriate values, and this value isn't live out of the block.
-///
Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
// If there is no definition of the renamed variable in this block, just use
// GetValueAtEndOfBlock to do our work.
@@ -165,8 +136,8 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
// Otherwise, we do need a PHI: check to see if we already have one available
// in this block that produces the right value.
if (isa<PHINode>(BB->begin())) {
- DenseMap<BasicBlock*, Value*> ValueMapping(PredValues.begin(),
- PredValues.end());
+ SmallDenseMap<BasicBlock*, Value*, 8> ValueMapping(PredValues.begin(),
+ PredValues.end());
PHINode *SomePHI;
for (BasicBlock::iterator It = BB->begin();
(SomePHI = dyn_cast<PHINode>(It)); ++It) {
@@ -203,8 +174,6 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
return InsertedPHI;
}
-/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes,
-/// which use their value in the corresponding predecessor.
void SSAUpdater::RewriteUse(Use &U) {
Instruction *User = cast<Instruction>(U.getUser());
@@ -222,10 +191,6 @@ void SSAUpdater::RewriteUse(Use &U) {
U.set(V);
}
-/// RewriteUseAfterInsertions - Rewrite a use, just like RewriteUse. However,
-/// this version of the method can rewrite uses in the same block as a
-/// definition, because it assumes that all uses of a value are below any
-/// inserted values.
void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
Instruction *User = cast<Instruction>(U.getUser());
@@ -238,8 +203,6 @@ void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
U.set(V);
}
-/// SSAUpdaterTraits<SSAUpdater> - Traits for the SSAUpdaterImpl template,
-/// specialized for SSAUpdater.
namespace llvm {
template<>
class SSAUpdaterTraits<SSAUpdater> {
@@ -342,10 +305,9 @@ public:
} // End llvm namespace
-/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
-/// for the specified BB and if so, return it. If not, construct SSA form by
-/// first calculating the required placement of PHIs and then inserting new
-/// PHIs where needed.
+/// Check to see if AvailableVals has an entry for the specified BB and if so,
+/// return it. If not, construct SSA form by first calculating the required
+/// placement of PHIs and then inserting new PHIs where needed.
Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
AvailableValsTy &AvailableVals = getAvailableVals(AV);
if (Value *V = AvailableVals[BB])
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 052ad85..ff50b12 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -40,12 +41,14 @@
#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/NoFolder.h"
+#include "llvm/Support/PatternMatch.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <algorithm>
#include <map>
#include <set>
using namespace llvm;
+using namespace PatternMatch;
static cl::opt<unsigned>
PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1),
@@ -88,7 +91,6 @@ namespace {
class SimplifyCFGOpt {
const TargetTransformInfo &TTI;
const DataLayout *const TD;
-
Value *isValueEqualityComparison(TerminatorInst *TI);
BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
std::vector<ValueEqualityComparisonCase> &Cases);
@@ -194,94 +196,7 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred);
}
-
-/// GetIfCondition - Given a basic block (BB) with two predecessors (and at
-/// least one PHI node in it), check to see if the merge at this block is due
-/// to an "if condition". If so, return the boolean condition that determines
-/// which entry into BB will be taken. Also, return by references the block
-/// that will be entered from if the condition is true, and the block that will
-/// be entered if the condition is false.
-///
-/// This does no checking to see if the true/false blocks have large or unsavory
-/// instructions in them.
-static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
- BasicBlock *&IfFalse) {
- PHINode *SomePHI = cast<PHINode>(BB->begin());
- assert(SomePHI->getNumIncomingValues() == 2 &&
- "Function can only handle blocks with 2 predecessors!");
- BasicBlock *Pred1 = SomePHI->getIncomingBlock(0);
- BasicBlock *Pred2 = SomePHI->getIncomingBlock(1);
-
- // We can only handle branches. Other control flow will be lowered to
- // branches if possible anyway.
- BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator());
- BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator());
- if (Pred1Br == 0 || Pred2Br == 0)
- return 0;
-
- // Eliminate code duplication by ensuring that Pred1Br is conditional if
- // either are.
- if (Pred2Br->isConditional()) {
- // If both branches are conditional, we don't have an "if statement". In
- // reality, we could transform this case, but since the condition will be
- // required anyway, we stand no chance of eliminating it, so the xform is
- // probably not profitable.
- if (Pred1Br->isConditional())
- return 0;
-
- std::swap(Pred1, Pred2);
- std::swap(Pred1Br, Pred2Br);
- }
-
- if (Pred1Br->isConditional()) {
- // The only thing we have to watch out for here is to make sure that Pred2
- // doesn't have incoming edges from other blocks. If it does, the condition
- // doesn't dominate BB.
- if (Pred2->getSinglePredecessor() == 0)
- return 0;
-
- // If we found a conditional branch predecessor, make sure that it branches
- // to BB and Pred2Br. If it doesn't, this isn't an "if statement".
- if (Pred1Br->getSuccessor(0) == BB &&
- Pred1Br->getSuccessor(1) == Pred2) {
- IfTrue = Pred1;
- IfFalse = Pred2;
- } else if (Pred1Br->getSuccessor(0) == Pred2 &&
- Pred1Br->getSuccessor(1) == BB) {
- IfTrue = Pred2;
- IfFalse = Pred1;
- } else {
- // We know that one arm of the conditional goes to BB, so the other must
- // go somewhere unrelated, and this must not be an "if statement".
- return 0;
- }
-
- return Pred1Br->getCondition();
- }
-
- // Ok, if we got here, both predecessors end with an unconditional branch to
- // BB. Don't panic! If both blocks only have a single (identical)
- // predecessor, and THAT is a conditional branch, then we're all ok!
- BasicBlock *CommonPred = Pred1->getSinglePredecessor();
- if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor())
- return 0;
-
- // Otherwise, if this is a conditional branch, then we can use it!
- BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
- if (BI == 0) return 0;
-
- assert(BI->isConditional() && "Two successors but not conditional?");
- if (BI->getSuccessor(0) == Pred1) {
- IfTrue = Pred1;
- IfFalse = Pred2;
- } else {
- IfTrue = Pred2;
- IfFalse = Pred1;
- }
- return BI->getCondition();
-}
-
-/// ComputeSpeculuationCost - Compute an abstract "cost" of speculating the
+/// ComputeSpeculationCost - Compute an abstract "cost" of speculating the
/// given instruction, which is assumed to be safe to speculate. 1 means
/// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive.
static unsigned ComputeSpeculationCost(const User *I) {
@@ -432,7 +347,24 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
// If this is an icmp against a constant, handle this as one of the cases.
if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) {
+ Value *RHSVal;
+ ConstantInt *RHSC;
+
if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) {
+ // (x & ~2^x) == y --> x == y || x == y|2^x
+ // This undoes a transformation done by instcombine to fuse 2 compares.
+ if (match(ICI->getOperand(0),
+ m_And(m_Value(RHSVal), m_ConstantInt(RHSC)))) {
+ APInt Not = ~RHSC->getValue();
+ if (Not.isPowerOf2()) {
+ Vals.push_back(C);
+ Vals.push_back(
+ ConstantInt::get(C->getContext(), C->getValue() | Not));
+ UsedICmps++;
+ return RHSVal;
+ }
+ }
+
UsedICmps++;
Vals.push_back(C);
return I->getOperand(0);
@@ -443,6 +375,13 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
ConstantRange Span =
ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue());
+ // Shift the range if the compare is fed by an add. This is the range
+ // compare idiom as emitted by instcombine.
+ bool hasAdd =
+ match(I->getOperand(0), m_Add(m_Value(RHSVal), m_ConstantInt(RHSC)));
+ if (hasAdd)
+ Span = Span.subtract(RHSC->getValue());
+
// If this is an and/!= check then we want to optimize "x ugt 2" into
// x != 0 && x != 1.
if (!isEQ)
@@ -455,7 +394,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
Vals.push_back(ConstantInt::get(V->getContext(), Tmp));
UsedICmps++;
- return I->getOperand(0);
+ return hasAdd ? RHSVal : I->getOperand(0);
}
return 0;
}
@@ -533,15 +472,17 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
if (BI->isConditional() && BI->getCondition()->hasOneUse())
if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
- if ((ICI->getPredicate() == ICmpInst::ICMP_EQ ||
- ICI->getPredicate() == ICmpInst::ICMP_NE) &&
- GetConstantInt(ICI->getOperand(1), TD))
+ if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), TD))
CV = ICI->getOperand(0);
// Unwrap any lossless ptrtoint cast.
- if (TD && CV && CV->getType() == TD->getIntPtrType(CV->getContext()))
- if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV))
- CV = PTII->getOperand(0);
+ if (TD && CV) {
+ if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
+ Value *Ptr = PTII->getPointerOperand();
+ if (PTII->getType() == TD->getIntPtrType(Ptr->getType()))
+ CV = Ptr;
+ }
+ }
return CV;
}
@@ -763,9 +704,10 @@ namespace {
};
}
-static int ConstantIntSortPredicate(const void *P1, const void *P2) {
- const ConstantInt *LHS = *(const ConstantInt*const*)P1;
- const ConstantInt *RHS = *(const ConstantInt*const*)P2;
+static int ConstantIntSortPredicate(ConstantInt *const *P1,
+ ConstantInt *const *P2) {
+ const ConstantInt *LHS = *P1;
+ const ConstantInt *RHS = *P2;
if (LHS->getValue().ult(RHS->getValue()))
return 1;
if (LHS->getValue() == RHS->getValue())
@@ -988,7 +930,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
// Convert pointer to int before we switch.
if (CV->getType()->isPointerTy()) {
assert(TD && "Cannot switch on pointer without DataLayout");
- CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()),
+ CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getType()),
"magicptr");
}
@@ -1083,9 +1025,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
(isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
return false;
- // If we get here, we can hoist at least one instruction.
BasicBlock *BIParent = BI->getParent();
+ bool Changed = false;
do {
// If we are hoisting the terminator instruction, don't move one (making a
// broken BB), instead clone it, and remove BI.
@@ -1100,6 +1042,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
I2->replaceAllUsesWith(I1);
I1->intersectOptionalDataWith(I2);
I2->eraseFromParent();
+ Changed = true;
I1 = BB1_Itr++;
I2 = BB2_Itr++;
@@ -1119,7 +1062,23 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
HoistTerminator:
// It may not be possible to hoist an invoke.
if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
- return true;
+ return Changed;
+
+ for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = SI->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ if (BB1V == BB2V)
+ continue;
+
+ if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V))
+ return Changed;
+ if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V))
+ return Changed;
+ }
+ }
// Okay, it is safe to hoist the terminator.
Instruction *NT = I1->clone();
@@ -1362,8 +1321,8 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
///
/// \return The pointer to the value of the previous store if the store can be
/// hoisted into the predecessor block. 0 otherwise.
-Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
- BasicBlock *StoreBB, BasicBlock *EndBB) {
+static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
+ BasicBlock *StoreBB, BasicBlock *EndBB) {
StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
if (!StoreToHoist)
return 0;
@@ -1522,18 +1481,23 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
Value *OrigV = PN->getIncomingValueForBlock(BB);
Value *ThenV = PN->getIncomingValueForBlock(ThenBB);
+ // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf.
// Skip PHIs which are trivial.
if (ThenV == OrigV)
continue;
HaveRewritablePHIs = true;
- ConstantExpr *CE = dyn_cast<ConstantExpr>(ThenV);
- if (!CE)
+ ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
+ ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
+ if (!OrigCE && !ThenCE)
continue; // Known safe and cheap.
- if (!isSafeToSpeculativelyExecute(CE))
+ if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
+ (OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
return false;
- if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold)
+ unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE) : 0;
+ unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE) : 0;
+ if (OrigCost + ThenCost > 2 * PHINodeFoldingThreshold)
return false;
// Account for the cost of an unfolded ConstantExpr which could end up
@@ -1598,6 +1562,19 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
return true;
}
+/// \returns True if this block contains a CallInst with the NoDuplicate
+/// attribute.
+static bool HasNoDuplicateCall(const BasicBlock *BB) {
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ const CallInst *CI = dyn_cast<CallInst>(I);
+ if (!CI)
+ continue;
+ if (CI->cannotDuplicate())
+ return true;
+ }
+ return false;
+}
+
/// BlockIsSimpleEnoughToThreadThrough - Return true if we can thread a branch
/// across this block.
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
@@ -1645,6 +1622,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) {
// Now we know that this block has multiple preds and two succs.
if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false;
+ if (HasNoDuplicateCall(BB)) return false;
+
// Okay, this is a simple enough basic block. See if any phi values are
// constants.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
@@ -2111,14 +2090,19 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// Ensure that any values used in the bonus instruction are also used
// by the terminator of the predecessor. This means that those values
// must already have been resolved, so we won't be inhibiting the
- // out-of-order core by speculating them earlier.
- if (BonusInst) {
+ // out-of-order core by speculating them earlier. We also allow
+ // instructions that are used by the terminator's condition because it
+ // exposes more merging opportunities.
+ bool UsedByBranch = (BonusInst && BonusInst->hasOneUse() &&
+ *BonusInst->use_begin() == Cond);
+
+ if (BonusInst && !UsedByBranch) {
// Collect the values used by the bonus inst
SmallPtrSet<Value*, 4> UsedValues;
for (Instruction::op_iterator OI = BonusInst->op_begin(),
OE = BonusInst->op_end(); OI != OE; ++OI) {
Value *V = *OI;
- if (!isa<Constant>(V))
+ if (!isa<Constant>(V) && !isa<Argument>(V))
UsedValues.insert(V);
}
@@ -2829,7 +2813,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD,
if (CompVal->getType()->isPointerTy()) {
assert(TD && "Cannot switch on pointer without DataLayout");
CompVal = Builder.CreatePtrToInt(CompVal,
- TD->getIntPtrType(CompVal->getContext()),
+ TD->getIntPtrType(CompVal->getType()),
"magicptr");
}
@@ -3202,7 +3186,7 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
/// and use it to remove dead cases.
static bool EliminateDeadSwitchCases(SwitchInst *SI) {
Value *Cond = SI->getCondition();
- unsigned Bits = cast<IntegerType>(Cond->getType())->getBitWidth();
+ unsigned Bits = Cond->getType()->getIntegerBitWidth();
APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
ComputeMaskedBits(Cond, KnownZero, KnownOne);
@@ -3307,7 +3291,7 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(),
E = ForwardingNodes.end(); I != E; ++I) {
PHINode *Phi = I->first;
- SmallVector<int,4> &Indexes = I->second;
+ SmallVectorImpl<int> &Indexes = I->second;
if (Indexes.size() < 2) continue;
@@ -3345,28 +3329,10 @@ static Constant *LookupConstant(Value *V,
/// simple instructions such as binary operations where both operands are
/// constant or can be replaced by constants from the ConstantPool. Returns the
/// resulting constant on success, 0 otherwise.
-static Constant *ConstantFold(Instruction *I,
- const SmallDenseMap<Value*, Constant*>& ConstantPool) {
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
- Constant *A = LookupConstant(BO->getOperand(0), ConstantPool);
- if (!A)
- return 0;
- Constant *B = LookupConstant(BO->getOperand(1), ConstantPool);
- if (!B)
- return 0;
- return ConstantExpr::get(BO->getOpcode(), A, B);
- }
-
- if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
- Constant *A = LookupConstant(I->getOperand(0), ConstantPool);
- if (!A)
- return 0;
- Constant *B = LookupConstant(I->getOperand(1), ConstantPool);
- if (!B)
- return 0;
- return ConstantExpr::getCompare(Cmp->getPredicate(), A, B);
- }
-
+static Constant *
+ConstantFold(Instruction *I,
+ const SmallDenseMap<Value *, Constant *> &ConstantPool,
+ const DataLayout *DL) {
if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
if (!A)
@@ -3378,25 +3344,32 @@ static Constant *ConstantFold(Instruction *I,
return 0;
}
- if (CastInst *Cast = dyn_cast<CastInst>(I)) {
- Constant *A = LookupConstant(I->getOperand(0), ConstantPool);
- if (!A)
+ SmallVector<Constant *, 4> COps;
+ for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
+ if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
+ COps.push_back(A);
+ else
return 0;
- return ConstantExpr::getCast(Cast->getOpcode(), A, Cast->getDestTy());
}
- return 0;
+ if (CmpInst *Cmp = dyn_cast<CmpInst>(I))
+ return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0],
+ COps[1], DL);
+
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), COps, DL);
}
/// GetCaseResults - Try to determine the resulting constant values in phi nodes
/// at the common destination basic block, *CommonDest, for one of the case
/// destionations CaseDest corresponding to value CaseVal (0 for the default
/// case), of a switch instruction SI.
-static bool GetCaseResults(SwitchInst *SI,
- ConstantInt *CaseVal,
- BasicBlock *CaseDest,
- BasicBlock **CommonDest,
- SmallVector<std::pair<PHINode*,Constant*>, 4> &Res) {
+static bool
+GetCaseResults(SwitchInst *SI,
+ ConstantInt *CaseVal,
+ BasicBlock *CaseDest,
+ BasicBlock **CommonDest,
+ SmallVectorImpl<std::pair<PHINode *, Constant *> > &Res,
+ const DataLayout *DL) {
// The block from which we enter the common destination.
BasicBlock *Pred = SI->getParent();
@@ -3415,7 +3388,7 @@ static bool GetCaseResults(SwitchInst *SI,
} else if (isa<DbgInfoIntrinsic>(I)) {
// Skip debug intrinsic.
continue;
- } else if (Constant *C = ConstantFold(I, ConstantPool)) {
+ } else if (Constant *C = ConstantFold(I, ConstantPool, DL)) {
// Instruction is side-effect free and constant.
ConstantPool.insert(std::make_pair(I, C));
} else {
@@ -3469,7 +3442,7 @@ namespace {
SwitchLookupTable(Module &M,
uint64_t TableSize,
ConstantInt *Offset,
- const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
+ const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
Constant *DefaultValue,
const DataLayout *TD);
@@ -3516,7 +3489,7 @@ namespace {
SwitchLookupTable::SwitchLookupTable(Module &M,
uint64_t TableSize,
ConstantInt *Offset,
- const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
+ const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
Constant *DefaultValue,
const DataLayout *TD)
: SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) {
@@ -3643,7 +3616,7 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD,
}
/// ShouldBuildLookupTable - Determine whether a lookup table should be built
-/// for this switch, based on the number of caes, size of the table and the
+/// for this switch, based on the number of cases, size of the table and the
/// types of the results.
static bool ShouldBuildLookupTable(SwitchInst *SI,
uint64_t TableSize,
@@ -3739,7 +3712,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy;
ResultsTy Results;
if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest,
- Results))
+ Results, TD))
return false;
// Append the result from this case to the list for each phi.
@@ -3753,7 +3726,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
// Get the resulting values for the default case.
SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
if (!GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest,
- DefaultResultsList))
+ DefaultResultsList, TD))
return false;
for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) {
PHINode *PHI = DefaultResultsList[I].first;
@@ -3774,14 +3747,32 @@ static bool SwitchToLookupTable(SwitchInst *SI,
CommonDest->getParent(),
CommonDest);
- // Check whether the condition value is within the case range, and branch to
- // the new BB.
+ // Compute the table index value.
Builder.SetInsertPoint(SI);
Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
"switch.tableidx");
- Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
- MinCaseVal->getType(), TableSize));
- Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+
+ // Compute the maximum table size representable by the integer type we are
+ // switching upon.
+ unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
+ uint64_t MaxTableSize = CaseSize > 63? UINT64_MAX : 1ULL << CaseSize;
+ assert(MaxTableSize >= TableSize &&
+ "It is impossible for a switch to have more entries than the max "
+ "representable value of its input integer type's size.");
+
+ // If we have a fully covered lookup table, unconditionally branch to the
+ // lookup table BB. Otherwise, check if the condition value is within the case
+ // range. If it is so, branch to the new BB. Otherwise branch to SI's default
+ // destination.
+ const bool GeneratingCoveredLookupTable = MaxTableSize == TableSize;
+ if (GeneratingCoveredLookupTable) {
+ Builder.CreateBr(LookupBB);
+ SI->getDefaultDest()->removePredecessor(SI->getParent());
+ } else {
+ Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
+ MinCaseVal->getType(), TableSize));
+ Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+ }
// Populate the BB that does the lookups.
Builder.SetInsertPoint(LookupBB);
@@ -3810,9 +3801,11 @@ static bool SwitchToLookupTable(SwitchInst *SI,
Builder.CreateBr(CommonDest);
// Remove the switch.
- for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) {
+ for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
BasicBlock *Succ = SI->getSuccessor(i);
- if (Succ == SI->getDefaultDest()) continue;
+
+ if (Succ == SI->getDefaultDest())
+ continue;
Succ->removePredecessor(SI->getParent());
}
SI->eraseFromParent();
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index 41c207c..bf3442a 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -119,7 +119,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
return 0;
D = ConstantInt::get(UseInst->getContext(),
- APInt(BitWidth, 1).shl(D->getZExtValue()));
+ APInt::getOneBitSet(BitWidth, D->getZExtValue()));
}
FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D));
}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 6bea2dd..15b3e66 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -17,6 +17,7 @@
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -26,11 +27,16 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
using namespace llvm;
+static cl::opt<bool>
+ColdErrorCalls("error-reporting-is-cold", cl::init(true),
+ cl::Hidden, cl::desc("Treat error-reporting calls as cold"));
+
/// This class is the abstract base class for the set of optimizations that
/// corresponds to one library call.
namespace {
@@ -118,6 +124,21 @@ static bool callHasFloatingPointArgument(const CallInst *CI) {
return false;
}
+/// \brief Check whether the overloaded unary floating point function
+/// corresponing to \a Ty is available.
+static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
+ LibFunc::Func DoubleFn, LibFunc::Func FloatFn,
+ LibFunc::Func LongDoubleFn) {
+ switch (Ty->getTypeID()) {
+ case Type::FloatTyID:
+ return TLI->has(FloatFn);
+ case Type::DoubleTyID:
+ return TLI->has(DoubleFn);
+ default:
+ return TLI->has(LongDoubleFn);
+ }
+}
+
//===----------------------------------------------------------------------===//
// Fortified Library Call Optimizations
//===----------------------------------------------------------------------===//
@@ -477,7 +498,7 @@ struct StrChrOpt : public LibCallOptimization {
// Compute the offset, make sure to handle the case when we're searching for
// zero (a weird way to spell strlen).
- size_t I = CharC->getSExtValue() == 0 ?
+ size_t I = (0xFF & CharC->getSExtValue()) == 0 ?
Str.size() : Str.find(CharC->getSExtValue());
if (I == StringRef::npos) // Didn't find the char. strchr returns null.
return Constant::getNullValue(CI->getType());
@@ -513,7 +534,7 @@ struct StrRChrOpt : public LibCallOptimization {
}
// Compute the offset.
- size_t I = CharC->getSExtValue() == 0 ?
+ size_t I = (0xFF & CharC->getSExtValue()) == 0 ?
Str.size() : Str.rfind(CharC->getSExtValue());
if (I == StringRef::npos) // Didn't find the char. Return null.
return Constant::getNullValue(CI->getType());
@@ -774,7 +795,7 @@ struct StrPBrkOpt : public LibCallOptimization {
// Constant folding.
if (HasS1 && HasS2) {
size_t I = S1.find_first_of(S2);
- if (I == std::string::npos) // No match.
+ if (I == StringRef::npos) // No match.
return Constant::getNullValue(CI->getType());
return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
@@ -912,7 +933,7 @@ struct StrStrOpt : public LibCallOptimization {
// If both strings are known, constant fold it.
if (HasStr1 && HasStr2) {
- std::string::size_type Offset = SearchStr.find(ToFindStr);
+ size_t Offset = SearchStr.find(ToFindStr);
if (Offset == StringRef::npos) // strstr("foo", "bar") -> null
return Constant::getNullValue(CI->getType());
@@ -1031,7 +1052,7 @@ struct MemSetOpt : public LibCallOptimization {
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isIntegerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(*Context))
+ FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)))
return 0;
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
@@ -1133,9 +1154,13 @@ struct PowOpt : public UnsafeFPLibCallOptimization {
Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
- if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0
+ // pow(1.0, x) -> 1.0
+ if (Op1C->isExactlyValue(1.0))
return Op1C;
- if (Op1C->isExactlyValue(2.0)) // pow(2.0, x) -> exp2(x)
+ // pow(2.0, x) -> exp2(x)
+ if (Op1C->isExactlyValue(2.0) &&
+ hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f,
+ LibFunc::exp2l))
return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
}
@@ -1145,7 +1170,11 @@ struct PowOpt : public UnsafeFPLibCallOptimization {
if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
return ConstantFP::get(CI->getType(), 1.0);
- if (Op2C->isExactlyValue(0.5)) {
+ if (Op2C->isExactlyValue(0.5) &&
+ hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::sqrt, LibFunc::sqrtf,
+ LibFunc::sqrtl) &&
+ hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::fabs, LibFunc::fabsf,
+ LibFunc::fabsl)) {
// Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
// This is faster than calling pow, and still handles negative zero
// and negative infinity correctly.
@@ -1178,7 +1207,7 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization {
virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
Value *Ret = NULL;
if (UnsafeFPShrink && Callee->getName() == "exp2" &&
- TLI->has(LibFunc::exp2)) {
+ TLI->has(LibFunc::exp2f)) {
UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B);
}
@@ -1229,6 +1258,155 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization {
}
};
+struct SinCosPiOpt : public LibCallOptimization {
+ SinCosPiOpt() {}
+
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Make sure the prototype is as expected, otherwise the rest of the
+ // function is probably invalid and likely to abort.
+ if (!isTrigLibCall(CI))
+ return 0;
+
+ Value *Arg = CI->getArgOperand(0);
+ SmallVector<CallInst *, 1> SinCalls;
+ SmallVector<CallInst *, 1> CosCalls;
+ SmallVector<CallInst *, 1> SinCosCalls;
+
+ bool IsFloat = Arg->getType()->isFloatTy();
+
+ // Look for all compatible sinpi, cospi and sincospi calls with the same
+ // argument. If there are enough (in some sense) we can make the
+ // substitution.
+ for (Value::use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
+ UI != UE; ++UI)
+ classifyArgUse(*UI, CI->getParent(), IsFloat, SinCalls, CosCalls,
+ SinCosCalls);
+
+ // It's only worthwhile if both sinpi and cospi are actually used.
+ if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty()))
+ return 0;
+
+ Value *Sin, *Cos, *SinCos;
+ insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos,
+ SinCos);
+
+ replaceTrigInsts(SinCalls, Sin);
+ replaceTrigInsts(CosCalls, Cos);
+ replaceTrigInsts(SinCosCalls, SinCos);
+
+ return 0;
+ }
+
+ bool isTrigLibCall(CallInst *CI) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+
+ // We can only hope to do anything useful if we can ignore things like errno
+ // and floating-point exceptions.
+ bool AttributesSafe = CI->hasFnAttr(Attribute::NoUnwind) &&
+ CI->hasFnAttr(Attribute::ReadNone);
+
+ // Other than that we need float(float) or double(double)
+ return AttributesSafe && FT->getNumParams() == 1 &&
+ FT->getReturnType() == FT->getParamType(0) &&
+ (FT->getParamType(0)->isFloatTy() ||
+ FT->getParamType(0)->isDoubleTy());
+ }
+
+ void classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat,
+ SmallVectorImpl<CallInst *> &SinCalls,
+ SmallVectorImpl<CallInst *> &CosCalls,
+ SmallVectorImpl<CallInst *> &SinCosCalls) {
+ CallInst *CI = dyn_cast<CallInst>(Val);
+
+ if (!CI)
+ return;
+
+ Function *Callee = CI->getCalledFunction();
+ StringRef FuncName = Callee->getName();
+ LibFunc::Func Func;
+ if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func) ||
+ !isTrigLibCall(CI))
+ return;
+
+ if (IsFloat) {
+ if (Func == LibFunc::sinpif)
+ SinCalls.push_back(CI);
+ else if (Func == LibFunc::cospif)
+ CosCalls.push_back(CI);
+ else if (Func == LibFunc::sincospi_stretf)
+ SinCosCalls.push_back(CI);
+ } else {
+ if (Func == LibFunc::sinpi)
+ SinCalls.push_back(CI);
+ else if (Func == LibFunc::cospi)
+ CosCalls.push_back(CI);
+ else if (Func == LibFunc::sincospi_stret)
+ SinCosCalls.push_back(CI);
+ }
+ }
+
+ void replaceTrigInsts(SmallVectorImpl<CallInst*> &Calls, Value *Res) {
+ for (SmallVectorImpl<CallInst*>::iterator I = Calls.begin(),
+ E = Calls.end();
+ I != E; ++I) {
+ LCS->replaceAllUsesWith(*I, Res);
+ }
+ }
+
+ void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
+ bool UseFloat, Value *&Sin, Value *&Cos,
+ Value *&SinCos) {
+ Type *ArgTy = Arg->getType();
+ Type *ResTy;
+ StringRef Name;
+
+ Triple T(OrigCallee->getParent()->getTargetTriple());
+ if (UseFloat) {
+ Name = "__sincospi_stretf";
+
+ assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now");
+ // x86_64 can't use {float, float} since that would be returned in both
+ // xmm0 and xmm1, which isn't what a real struct would do.
+ ResTy = T.getArch() == Triple::x86_64
+ ? static_cast<Type *>(VectorType::get(ArgTy, 2))
+ : static_cast<Type *>(StructType::get(ArgTy, ArgTy, NULL));
+ } else {
+ Name = "__sincospi_stret";
+ ResTy = StructType::get(ArgTy, ArgTy, NULL);
+ }
+
+ Module *M = OrigCallee->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(),
+ ResTy, ArgTy, NULL);
+
+ if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
+ // If the argument is an instruction, it must dominate all uses so put our
+ // sincos call there.
+ BasicBlock::iterator Loc = ArgInst;
+ B.SetInsertPoint(ArgInst->getParent(), ++Loc);
+ } else {
+ // Otherwise (e.g. for a constant) the beginning of the function is as
+ // good a place as any.
+ BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock();
+ B.SetInsertPoint(&EntryBB, EntryBB.begin());
+ }
+
+ SinCos = B.CreateCall(Callee, Arg, "sincospi");
+
+ if (SinCos->getType()->isStructTy()) {
+ Sin = B.CreateExtractValue(SinCos, 0, "sinpi");
+ Cos = B.CreateExtractValue(SinCos, 1, "cospi");
+ } else {
+ Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0),
+ "sinpi");
+ Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1),
+ "cospi");
+ }
+ }
+
+};
+
//===----------------------------------------------------------------------===//
// Integer Library Call Optimizations
//===----------------------------------------------------------------------===//
@@ -1333,6 +1511,54 @@ struct ToAsciiOpt : public LibCallOptimization {
// Formatting and IO Library Call Optimizations
//===----------------------------------------------------------------------===//
+struct ErrorReportingOpt : public LibCallOptimization {
+ ErrorReportingOpt(int S = -1) : StreamArg(S) {}
+
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &) {
+ // Error reporting calls should be cold, mark them as such.
+ // This applies even to non-builtin calls: it is only a hint and applies to
+ // functions that the frontend might not understand as builtins.
+
+ // This heuristic was suggested in:
+ // Improving Static Branch Prediction in a Compiler
+ // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu
+ // Proceedings of PACT'98, Oct. 1998, IEEE
+
+ if (!CI->hasFnAttr(Attribute::Cold) && isReportingError(Callee, CI)) {
+ CI->addAttribute(AttributeSet::FunctionIndex, Attribute::Cold);
+ }
+
+ return 0;
+ }
+
+protected:
+ bool isReportingError(Function *Callee, CallInst *CI) {
+ if (!ColdErrorCalls)
+ return false;
+
+ if (!Callee || !Callee->isDeclaration())
+ return false;
+
+ if (StreamArg < 0)
+ return true;
+
+ // These functions might be considered cold, but only if their stream
+ // argument is stderr.
+
+ if (StreamArg >= (int) CI->getNumArgOperands())
+ return false;
+ LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(StreamArg));
+ if (!LI)
+ return false;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand());
+ if (!GV || !GV->isDeclaration())
+ return false;
+ return GV->getName() == "stderr";
+ }
+
+ int StreamArg;
+};
+
struct PrintFOpt : public LibCallOptimization {
Value *optimizeFixedFormatString(Function *Callee, CallInst *CI,
IRBuilder<> &B) {
@@ -1361,7 +1587,7 @@ struct PrintFOpt : public LibCallOptimization {
// printf("foo\n") --> puts("foo")
if (FormatStr[FormatStr.size()-1] == '\n' &&
- FormatStr.find('%') == std::string::npos) { // no format characters.
+ FormatStr.find('%') == StringRef::npos) { // No format characters.
// Create a string literal with no \n on it. We expect the constant merge
// pass to be run after this pass, to merge duplicate strings.
FormatStr = FormatStr.drop_back();
@@ -1513,6 +1739,9 @@ struct SPrintFOpt : public LibCallOptimization {
struct FPrintFOpt : public LibCallOptimization {
Value *optimizeFixedFormatString(Function *Callee, CallInst *CI,
IRBuilder<> &B) {
+ ErrorReportingOpt ER(/* StreamArg = */ 0);
+ (void) ER.callOptimizer(Callee, CI, B);
+
// All the optimizations depend on the format string.
StringRef FormatStr;
if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
@@ -1590,6 +1819,9 @@ struct FPrintFOpt : public LibCallOptimization {
struct FWriteOpt : public LibCallOptimization {
virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ ErrorReportingOpt ER(/* StreamArg = */ 3);
+ (void) ER.callOptimizer(Callee, CI, B);
+
// Require a pointer, an integer, an integer, a pointer, returning integer.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
@@ -1623,6 +1855,9 @@ struct FWriteOpt : public LibCallOptimization {
struct FPutsOpt : public LibCallOptimization {
virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ ErrorReportingOpt ER(/* StreamArg = */ 1);
+ (void) ER.callOptimizer(Callee, CI, B);
+
// These optimizations require DataLayout.
if (!TD) return 0;
@@ -1741,6 +1976,7 @@ static MemSetOpt MemSet;
// Math library call optimizations.
static UnaryDoubleFPOpt UnaryDoubleFP(false);
static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+static SinCosPiOpt SinCosPi;
// Integer library call optimizations.
static FFSOpt FFS;
@@ -1750,6 +1986,9 @@ static IsAsciiOpt IsAscii;
static ToAsciiOpt ToAscii;
// Formatting and IO library call optimizations.
+static ErrorReportingOpt ErrorReporting;
+static ErrorReportingOpt ErrorReporting0(0);
+static ErrorReportingOpt ErrorReporting1(1);
static PrintFOpt PrintF;
static SPrintFOpt SPrintF;
static FPrintFOpt FPrintF;
@@ -1825,6 +2064,11 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
case LibFunc::cos:
case LibFunc::cosl:
return &Cos;
+ case LibFunc::sinpif:
+ case LibFunc::sinpi:
+ case LibFunc::cospif:
+ case LibFunc::cospi:
+ return &SinCosPi;
case LibFunc::powf:
case LibFunc::pow:
case LibFunc::powl:
@@ -1859,6 +2103,13 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
return &FPuts;
case LibFunc::puts:
return &Puts;
+ case LibFunc::perror:
+ return &ErrorReporting;
+ case LibFunc::vfprintf:
+ case LibFunc::fiprintf:
+ return &ErrorReporting0;
+ case LibFunc::fputc:
+ return &ErrorReporting1;
case LibFunc::ceil:
case LibFunc::fabs:
case LibFunc::floor:
@@ -1940,7 +2191,7 @@ LibCallSimplifier::~LibCallSimplifier() {
}
Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
- if (CI->hasFnAttr(Attribute::NoBuiltin)) return 0;
+ if (CI->isNoBuiltin()) return 0;
return Impl->optimizeCall(CI);
}
@@ -1950,3 +2201,53 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const {
}
}
+
+// TODO:
+// Additional cases that we need to add to this file:
+//
+// cbrt:
+// * cbrt(expN(X)) -> expN(x/3)
+// * cbrt(sqrt(x)) -> pow(x,1/6)
+// * cbrt(sqrt(x)) -> pow(x,1/9)
+//
+// exp, expf, expl:
+// * exp(log(x)) -> x
+//
+// log, logf, logl:
+// * log(exp(x)) -> x
+// * log(x**y) -> y*log(x)
+// * log(exp(y)) -> y*log(e)
+// * log(exp2(y)) -> y*log(2)
+// * log(exp10(y)) -> y*log(10)
+// * log(sqrt(x)) -> 0.5*log(x)
+// * log(pow(x,y)) -> y*log(x)
+//
+// lround, lroundf, lroundl:
+// * lround(cnst) -> cnst'
+//
+// pow, powf, powl:
+// * pow(exp(x),y) -> exp(x*y)
+// * pow(sqrt(x),y) -> pow(x,y*0.5)
+// * pow(pow(x,y),z)-> pow(x,y*z)
+//
+// round, roundf, roundl:
+// * round(cnst) -> cnst'
+//
+// signbit:
+// * signbit(cnst) -> cnst'
+// * signbit(nncst) -> 0 (if pstv is a non-negative constant)
+//
+// sqrt, sqrtf, sqrtl:
+// * sqrt(expN(x)) -> expN(x*0.5)
+// * sqrt(Nroot(x)) -> pow(x,1/(2*N))
+// * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
+//
+// strchr:
+// * strchr(p, 0) -> strlen(p)
+// tan, tanf, tanl:
+// * tan(atan(x)) -> x
+//
+// trunc, truncf, truncl:
+// * trunc(cnst) -> cnst'
+//
+//
diff --git a/contrib/llvm/lib/Transforms/Utils/SpecialCaseList.cpp b/contrib/llvm/lib/Transforms/Utils/SpecialCaseList.cpp
new file mode 100644
index 0000000..2ef692c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SpecialCaseList.cpp
@@ -0,0 +1,222 @@
+//===-- SpecialCaseList.cpp - special case list for sanitizers ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility class for instrumentation passes (like AddressSanitizer
+// or ThreadSanitizer) to avoid instrumenting some functions or global
+// variables, or to instrument some functions or global variables in a specific
+// way, based on a user-supplied list.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SpecialCaseList.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include <string>
+#include <utility>
+
+namespace llvm {
+
+/// Represents a set of regular expressions. Regular expressions which are
+/// "literal" (i.e. no regex metacharacters) are stored in Strings, while all
+/// others are represented as a single pipe-separated regex in RegEx. The
+/// reason for doing so is efficiency; StringSet is much faster at matching
+/// literal strings than Regex.
+struct SpecialCaseList::Entry {
+ StringSet<> Strings;
+ Regex *RegEx;
+
+ Entry() : RegEx(0) {}
+
+ bool match(StringRef Query) const {
+ return Strings.count(Query) || (RegEx && RegEx->match(Query));
+ }
+};
+
+SpecialCaseList::SpecialCaseList() : Entries() {}
+
+SpecialCaseList *SpecialCaseList::create(
+ const StringRef Path, std::string &Error) {
+ if (Path.empty())
+ return new SpecialCaseList();
+ OwningPtr<MemoryBuffer> File;
+ if (error_code EC = MemoryBuffer::getFile(Path, File)) {
+ Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str();
+ return 0;
+ }
+ return create(File.get(), Error);
+}
+
+SpecialCaseList *SpecialCaseList::create(
+ const MemoryBuffer *MB, std::string &Error) {
+ OwningPtr<SpecialCaseList> SCL(new SpecialCaseList());
+ if (!SCL->parse(MB, Error))
+ return 0;
+ return SCL.take();
+}
+
+SpecialCaseList *SpecialCaseList::createOrDie(const StringRef Path) {
+ std::string Error;
+ if (SpecialCaseList *SCL = create(Path, Error))
+ return SCL;
+ report_fatal_error(Error);
+}
+
+bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
+ // Iterate through each line in the blacklist file.
+ SmallVector<StringRef, 16> Lines;
+ SplitString(MB->getBuffer(), Lines, "\n\r");
+ StringMap<StringMap<std::string> > Regexps;
+ assert(Entries.empty() &&
+ "parse() should be called on an empty SpecialCaseList");
+ int LineNo = 1;
+ for (SmallVectorImpl<StringRef>::iterator I = Lines.begin(), E = Lines.end();
+ I != E; ++I, ++LineNo) {
+ // Ignore empty lines and lines starting with "#"
+ if (I->empty() || I->startswith("#"))
+ continue;
+ // Get our prefix and unparsed regexp.
+ std::pair<StringRef, StringRef> SplitLine = I->split(":");
+ StringRef Prefix = SplitLine.first;
+ if (SplitLine.second.empty()) {
+ // Missing ':' in the line.
+ Error = (Twine("Malformed line ") + Twine(LineNo) + ": '" +
+ SplitLine.first + "'").str();
+ return false;
+ }
+
+ std::pair<StringRef, StringRef> SplitRegexp = SplitLine.second.split("=");
+ std::string Regexp = SplitRegexp.first;
+ StringRef Category = SplitRegexp.second;
+
+ // Backwards compatibility.
+ if (Prefix == "global-init") {
+ Prefix = "global";
+ Category = "init";
+ } else if (Prefix == "global-init-type") {
+ Prefix = "type";
+ Category = "init";
+ } else if (Prefix == "global-init-src") {
+ Prefix = "src";
+ Category = "init";
+ }
+
+ // See if we can store Regexp in Strings.
+ if (Regex::isLiteralERE(Regexp)) {
+ Entries[Prefix][Category].Strings.insert(Regexp);
+ continue;
+ }
+
+ // Replace * with .*
+ for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos;
+ pos += strlen(".*")) {
+ Regexp.replace(pos, strlen("*"), ".*");
+ }
+
+ // Check that the regexp is valid.
+ Regex CheckRE(Regexp);
+ std::string REError;
+ if (!CheckRE.isValid(REError)) {
+ Error = (Twine("Malformed regex in line ") + Twine(LineNo) + ": '" +
+ SplitLine.second + "': " + REError).str();
+ return false;
+ }
+
+ // Add this regexp into the proper group by its prefix.
+ if (!Regexps[Prefix][Category].empty())
+ Regexps[Prefix][Category] += "|";
+ Regexps[Prefix][Category] += "^" + Regexp + "$";
+ }
+
+ // Iterate through each of the prefixes, and create Regexs for them.
+ for (StringMap<StringMap<std::string> >::const_iterator I = Regexps.begin(),
+ E = Regexps.end();
+ I != E; ++I) {
+ for (StringMap<std::string>::const_iterator II = I->second.begin(),
+ IE = I->second.end();
+ II != IE; ++II) {
+ Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue());
+ }
+ }
+ return true;
+}
+
+SpecialCaseList::~SpecialCaseList() {
+ for (StringMap<StringMap<Entry> >::iterator I = Entries.begin(),
+ E = Entries.end();
+ I != E; ++I) {
+ for (StringMap<Entry>::const_iterator II = I->second.begin(),
+ IE = I->second.end();
+ II != IE; ++II) {
+ delete II->second.RegEx;
+ }
+ }
+}
+
+bool SpecialCaseList::isIn(const Function& F, const StringRef Category) const {
+ return isIn(*F.getParent(), Category) ||
+ inSectionCategory("fun", F.getName(), Category);
+}
+
+static StringRef GetGlobalTypeString(const GlobalValue &G) {
+ // Types of GlobalVariables are always pointer types.
+ Type *GType = G.getType()->getElementType();
+ // For now we support blacklisting struct types only.
+ if (StructType *SGType = dyn_cast<StructType>(GType)) {
+ if (!SGType->isLiteral())
+ return SGType->getName();
+ }
+ return "<unknown type>";
+}
+
+bool SpecialCaseList::isIn(const GlobalVariable &G,
+ const StringRef Category) const {
+ return isIn(*G.getParent(), Category) ||
+ inSectionCategory("global", G.getName(), Category) ||
+ inSectionCategory("type", GetGlobalTypeString(G), Category);
+}
+
+bool SpecialCaseList::isIn(const GlobalAlias &GA,
+ const StringRef Category) const {
+ if (isIn(*GA.getParent(), Category))
+ return true;
+
+ if (isa<FunctionType>(GA.getType()->getElementType()))
+ return inSectionCategory("fun", GA.getName(), Category);
+
+ return inSectionCategory("global", GA.getName(), Category) ||
+ inSectionCategory("type", GetGlobalTypeString(GA), Category);
+}
+
+bool SpecialCaseList::isIn(const Module &M, const StringRef Category) const {
+ return inSectionCategory("src", M.getModuleIdentifier(), Category);
+}
+
+bool SpecialCaseList::inSectionCategory(const StringRef Section,
+ const StringRef Query,
+ const StringRef Category) const {
+ StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
+ if (I == Entries.end()) return false;
+ StringMap<Entry>::const_iterator II = I->second.find(Category);
+ if (II == I->second.end()) return false;
+
+ return II->getValue().match(Query);
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
index 544c5ee..457fc80 100644
--- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -22,14 +22,22 @@ using namespace llvm;
// Out of line method to get vtable etc for class.
void ValueMapTypeRemapper::anchor() {}
+void ValueMaterializer::anchor() {}
Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
- ValueMapTypeRemapper *TypeMapper) {
+ ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer) {
ValueToValueMapTy::iterator I = VM.find(V);
// If the value already exists in the map, use it.
if (I != VM.end() && I->second) return I->second;
+ // If we have a materializer and it can materialize a value, use that.
+ if (Materializer) {
+ if (Value *NewV = Materializer->materializeValueFor(const_cast<Value*>(V)))
+ return VM[V] = NewV;
+ }
+
// Global values do not need to be seeded into the VM if they
// are using the identity mapping.
if (isa<GlobalValue>(V) || isa<MDString>(V))
@@ -64,7 +72,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
Value *OP = MD->getOperand(i);
if (OP == 0) continue;
- Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper);
+ Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper, Materializer);
// Use identity map if Mapped_Op is null and we can ignore missing
// entries.
if (Mapped_OP == OP ||
@@ -79,7 +87,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
if (Op == 0)
Elts.push_back(0);
else {
- Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper);
+ Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper, Materializer);
// Use identity map if Mapped_Op is null and we can ignore missing
// entries.
if (Mapped_Op == 0 && (Flags & RF_IgnoreMissingEntries))
@@ -109,9 +117,9 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
Function *F =
- cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper));
+ cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper, Materializer));
BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM,
- Flags, TypeMapper));
+ Flags, TypeMapper, Materializer));
return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
}
@@ -121,7 +129,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
Value *Mapped = 0;
for (; OpNo != NumOperands; ++OpNo) {
Value *Op = C->getOperand(OpNo);
- Mapped = MapValue(Op, VM, Flags, TypeMapper);
+ Mapped = MapValue(Op, VM, Flags, TypeMapper, Materializer);
if (Mapped != C) break;
}
@@ -149,7 +157,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
// Map the rest of the operands that aren't processed yet.
for (++OpNo; OpNo != NumOperands; ++OpNo)
Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM,
- Flags, TypeMapper));
+ Flags, TypeMapper, Materializer));
}
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
@@ -173,10 +181,11 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
/// current values into those specified by VMap.
///
void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
- RemapFlags Flags, ValueMapTypeRemapper *TypeMapper){
+ RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer){
// Remap operands.
for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
- Value *V = MapValue(*op, VMap, Flags, TypeMapper);
+ Value *V = MapValue(*op, VMap, Flags, TypeMapper, Materializer);
// If we aren't ignoring missing entries, assert that something happened.
if (V != 0)
*op = V;
@@ -204,7 +213,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
MDNode *Old = MI->second;
- MDNode *New = MapValue(Old, VMap, Flags, TypeMapper);
+ MDNode *New = MapValue(Old, VMap, Flags, TypeMapper, Materializer);
if (New != Old)
I->setMetadata(MI->first, New);
}
diff --git a/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
index 17900da..c5e1dcb 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -356,7 +356,7 @@ namespace {
Instruction *J, unsigned o, bool IBeforeJ);
void getReplacementInputsForPair(LLVMContext& Context, Instruction *I,
- Instruction *J, SmallVector<Value *, 3> &ReplacedOperands,
+ Instruction *J, SmallVectorImpl<Value *> &ReplacedOperands,
bool IBeforeJ);
void replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
@@ -533,7 +533,7 @@ namespace {
default: break;
case Instruction::GetElementPtr:
// We mark this instruction as zero-cost because scalar GEPs are usually
- // lowered to the intruction addressing mode. At the moment we don't
+ // lowered to the instruction addressing mode. At the moment we don't
// generate vector GEPs.
return 0;
case Instruction::Br:
@@ -625,10 +625,10 @@ namespace {
ConstantInt *IntOff = ConstOffSCEV->getValue();
int64_t Offset = IntOff->getSExtValue();
- Type *VTy = cast<PointerType>(IPtr->getType())->getElementType();
+ Type *VTy = IPtr->getType()->getPointerElementType();
int64_t VTyTSS = (int64_t) TD->getTypeStoreSize(VTy);
- Type *VTy2 = cast<PointerType>(JPtr->getType())->getElementType();
+ Type *VTy2 = JPtr->getType()->getPointerElementType();
if (VTy != VTy2 && Offset < 0) {
int64_t VTy2TSS = (int64_t) TD->getTypeStoreSize(VTy2);
OffsetInElmts = Offset/VTy2TSS;
@@ -1182,6 +1182,8 @@ namespace {
// Look for an instruction with which to pair instruction *I...
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
+ if (I->mayWriteToMemory()) WriteSet.add(I);
+
bool JAfterStart = IAfterStart;
BasicBlock::iterator J = llvm::next(I);
for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
@@ -1403,6 +1405,8 @@ namespace {
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
+ if (I->mayWriteToMemory()) WriteSet.add(I);
+
for (BasicBlock::iterator J = llvm::next(I); J != E; ++J) {
(void) trackUsesOfI(Users, WriteSet, I, J);
@@ -1602,7 +1606,7 @@ namespace {
DenseSet<ValuePair> CurrentPairs;
bool CanAdd = true;
- for (SmallVector<ValuePairWithDepth, 8>::iterator C2
+ for (SmallVectorImpl<ValuePairWithDepth>::iterator C2
= BestChildren.begin(), E2 = BestChildren.end();
C2 != E2; ++C2) {
if (C2->first.first == C->first.first ||
@@ -1642,7 +1646,7 @@ namespace {
if (!CanAdd) continue;
// And check the queue too...
- for (SmallVector<ValuePairWithDepth, 32>::iterator C2 = Q.begin(),
+ for (SmallVectorImpl<ValuePairWithDepth>::iterator C2 = Q.begin(),
E2 = Q.end(); C2 != E2; ++C2) {
if (C2->first.first == C->first.first ||
C2->first.first == C->first.second ||
@@ -1691,7 +1695,7 @@ namespace {
// to an already-selected child. Check for this here, and if a
// conflict is found, then remove the previously-selected child
// before adding this one in its place.
- for (SmallVector<ValuePairWithDepth, 8>::iterator C2
+ for (SmallVectorImpl<ValuePairWithDepth>::iterator C2
= BestChildren.begin(); C2 != BestChildren.end();) {
if (C2->first.first == C->first.first ||
C2->first.first == C->first.second ||
@@ -1706,7 +1710,7 @@ namespace {
BestChildren.push_back(ValuePairWithDepth(C->first, C->second));
}
- for (SmallVector<ValuePairWithDepth, 8>::iterator C
+ for (SmallVectorImpl<ValuePairWithDepth>::iterator C
= BestChildren.begin(), E2 = BestChildren.end();
C != E2; ++C) {
size_t DepthF = getDepthFactor(C->first.first);
@@ -2227,11 +2231,12 @@ namespace {
// The pointer value is taken to be the one with the lowest offset.
Value *VPtr = IPtr;
- Type *ArgTypeI = cast<PointerType>(IPtr->getType())->getElementType();
- Type *ArgTypeJ = cast<PointerType>(JPtr->getType())->getElementType();
+ Type *ArgTypeI = IPtr->getType()->getPointerElementType();
+ Type *ArgTypeJ = JPtr->getType()->getPointerElementType();
Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
- Type *VArgPtrType = PointerType::get(VArgType,
- cast<PointerType>(IPtr->getType())->getAddressSpace());
+ Type *VArgPtrType
+ = PointerType::get(VArgType,
+ IPtr->getType()->getPointerAddressSpace());
return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o),
/* insert before */ I);
}
@@ -2240,7 +2245,7 @@ namespace {
unsigned MaskOffset, unsigned NumInElem,
unsigned NumInElem1, unsigned IdxOffset,
std::vector<Constant*> &Mask) {
- unsigned NumElem1 = cast<VectorType>(J->getType())->getNumElements();
+ unsigned NumElem1 = J->getType()->getVectorNumElements();
for (unsigned v = 0; v < NumElem1; ++v) {
int m = cast<ShuffleVectorInst>(J)->getMaskValue(v);
if (m < 0) {
@@ -2267,18 +2272,18 @@ namespace {
Type *ArgTypeJ = J->getType();
Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
- unsigned NumElemI = cast<VectorType>(ArgTypeI)->getNumElements();
+ unsigned NumElemI = ArgTypeI->getVectorNumElements();
// Get the total number of elements in the fused vector type.
// By definition, this must equal the number of elements in
// the final mask.
- unsigned NumElem = cast<VectorType>(VArgType)->getNumElements();
+ unsigned NumElem = VArgType->getVectorNumElements();
std::vector<Constant*> Mask(NumElem);
Type *OpTypeI = I->getOperand(0)->getType();
- unsigned NumInElemI = cast<VectorType>(OpTypeI)->getNumElements();
+ unsigned NumInElemI = OpTypeI->getVectorNumElements();
Type *OpTypeJ = J->getOperand(0)->getType();
- unsigned NumInElemJ = cast<VectorType>(OpTypeJ)->getNumElements();
+ unsigned NumInElemJ = OpTypeJ->getVectorNumElements();
// The fused vector will be:
// -----------------------------------------------------
@@ -2340,6 +2345,12 @@ namespace {
return ExpandedIEChain;
}
+ static unsigned getNumScalarElements(Type *Ty) {
+ if (VectorType *VecTy = dyn_cast<VectorType>(Ty))
+ return VecTy->getNumElements();
+ return 1;
+ }
+
// Returns the value to be used as the specified operand of the vector
// instruction that fuses I with J.
Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I,
@@ -2355,17 +2366,8 @@ namespace {
Instruction *L = I, *H = J;
Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ;
- unsigned numElemL;
- if (ArgTypeL->isVectorTy())
- numElemL = cast<VectorType>(ArgTypeL)->getNumElements();
- else
- numElemL = 1;
-
- unsigned numElemH;
- if (ArgTypeH->isVectorTy())
- numElemH = cast<VectorType>(ArgTypeH)->getNumElements();
- else
- numElemH = 1;
+ unsigned numElemL = getNumScalarElements(ArgTypeL);
+ unsigned numElemH = getNumScalarElements(ArgTypeH);
Value *LOp = L->getOperand(o);
Value *HOp = H->getOperand(o);
@@ -2426,11 +2428,12 @@ namespace {
if (CanUseInputs) {
unsigned LOpElem =
- cast<VectorType>(cast<Instruction>(LOp)->getOperand(0)->getType())
- ->getNumElements();
+ cast<Instruction>(LOp)->getOperand(0)->getType()
+ ->getVectorNumElements();
+
unsigned HOpElem =
- cast<VectorType>(cast<Instruction>(HOp)->getOperand(0)->getType())
- ->getNumElements();
+ cast<Instruction>(HOp)->getOperand(0)->getType()
+ ->getVectorNumElements();
// We have one or two input vectors. We need to map each index of the
// operands to the index of the original vector.
@@ -2646,14 +2649,14 @@ namespace {
getReplacementName(IBeforeJ ? I : J,
true, o, 1));
}
-
+
NHOp->insertBefore(IBeforeJ ? J : I);
HOp = NHOp;
}
}
if (ArgType->isVectorTy()) {
- unsigned numElem = cast<VectorType>(VArgType)->getNumElements();
+ unsigned numElem = VArgType->getVectorNumElements();
std::vector<Constant*> Mask(numElem);
for (unsigned v = 0; v < numElem; ++v) {
unsigned Idx = v;
@@ -2687,7 +2690,7 @@ namespace {
// to the vector instruction that fuses I with J.
void BBVectorize::getReplacementInputsForPair(LLVMContext& Context,
Instruction *I, Instruction *J,
- SmallVector<Value *, 3> &ReplacedOperands,
+ SmallVectorImpl<Value *> &ReplacedOperands,
bool IBeforeJ) {
unsigned NumOperands = I->getNumOperands();
@@ -2746,16 +2749,8 @@ namespace {
VectorType *VType = getVecTypeForPair(IType, JType);
unsigned numElem = VType->getNumElements();
- unsigned numElemI, numElemJ;
- if (IType->isVectorTy())
- numElemI = cast<VectorType>(IType)->getNumElements();
- else
- numElemI = 1;
-
- if (JType->isVectorTy())
- numElemJ = cast<VectorType>(JType)->getNumElements();
- else
- numElemJ = 1;
+ unsigned numElemI = getNumScalarElements(IType);
+ unsigned numElemJ = getNumScalarElements(JType);
if (IType->isVectorTy()) {
std::vector<Constant*> Mask1(numElemI), Mask2(numElemI);
@@ -2804,6 +2799,8 @@ namespace {
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
+ if (I->mayWriteToMemory()) WriteSet.add(I);
+
for (; cast<Instruction>(L) != J; ++L)
(void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs);
@@ -2824,6 +2821,8 @@ namespace {
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
+ if (I->mayWriteToMemory()) WriteSet.add(I);
+
for (; cast<Instruction>(L) != J;) {
if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs)) {
// Move this instruction
@@ -2853,6 +2852,7 @@ namespace {
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
+ if (I->mayWriteToMemory()) WriteSet.add(I);
// Note: We cannot end the loop when we reach J because J could be moved
// farther down the use chain by another instruction pairing. Also, J
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 08d3725..5e75871 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -47,13 +47,15 @@
#include "llvm/Transforms/Vectorize.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
@@ -119,11 +121,14 @@ static const unsigned TinyTripCountUnrollThreshold = 128;
/// than this number of comparisons.
static const unsigned RuntimeMemoryCheckThreshold = 8;
-/// We use a metadata with this name to indicate that a scalar loop was
-/// vectorized and that we don't need to re-vectorize it if we run into it
-/// again.
-static const char*
-AlreadyVectorizedMDName = "llvm.vectorizer.already_vectorized";
+/// Maximum simd width.
+static const unsigned MaxVectorWidth = 64;
+
+/// Maximum vectorization unroll count.
+static const unsigned MaxUnrollFactor = 16;
+
+/// The cost of a loop that is considered 'small' by the unroller.
+static const unsigned SmallLoopCost = 20;
namespace {
@@ -166,7 +171,9 @@ public:
updateAnalysis();
}
-private:
+ virtual ~InnerLoopVectorizer() {}
+
+protected:
/// A small list of PHINodes.
typedef SmallVector<PHINode*, 4> PhiVector;
/// When we unroll loops we have multiple vector values for each scalar.
@@ -174,6 +181,11 @@ private:
/// originated from one scalar instruction.
typedef SmallVector<Value*, 2> VectorParts;
+ // When we if-convert we need create edge masks. We have to cache values so
+ // that we don't end up with exponential recursion/IR.
+ typedef DenseMap<std::pair<BasicBlock*, BasicBlock*>,
+ VectorParts> EdgeMaskCache;
+
/// Add code that checks at runtime if the accessed arrays overlap.
/// Returns the comparator value or NULL if no check is needed.
Instruction *addRuntimeCheck(LoopVectorizationLegality *Legal,
@@ -181,7 +193,13 @@ private:
/// Create an empty loop, based on the loop ranges of the old loop.
void createEmptyLoop(LoopVectorizationLegality *Legal);
/// Copy and widen the instructions from the old loop.
- void vectorizeLoop(LoopVectorizationLegality *Legal);
+ virtual void vectorizeLoop(LoopVectorizationLegality *Legal);
+
+ /// \brief The Loop exit block may have single value PHI nodes where the
+ /// incoming value is 'Undef'. While vectorizing we only handled real values
+ /// that were defined inside the loop. Here we fix the 'undef case'.
+ /// See PR14725.
+ void fixLCSSAPHIs();
/// A helper function that computes the predicate of the block BB, assuming
/// that the header block of the loop is set to True. It returns the *entry*
@@ -195,16 +213,23 @@ private:
void vectorizeBlockInLoop(LoopVectorizationLegality *Legal, BasicBlock *BB,
PhiVector *PV);
+ /// Vectorize a single PHINode in a block. This method handles the induction
+ /// variable canonicalization. It supports both VF = 1 for unrolled loops and
+ /// arbitrary length vectors.
+ void widenPHIInstruction(Instruction *PN, VectorParts &Entry,
+ LoopVectorizationLegality *Legal,
+ unsigned UF, unsigned VF, PhiVector *PV);
+
/// Insert the new loop to the loop hierarchy and pass manager
/// and update the analysis passes.
void updateAnalysis();
/// This instruction is un-vectorizable. Implement it as a sequence
/// of scalars.
- void scalarizeInstruction(Instruction *Instr);
+ virtual void scalarizeInstruction(Instruction *Instr);
/// Vectorize Load and Store instructions,
- void vectorizeMemoryInstruction(Instruction *Instr,
+ virtual void vectorizeMemoryInstruction(Instruction *Instr,
LoopVectorizationLegality *Legal);
/// Create a broadcast instruction. This method generates a broadcast
@@ -212,12 +237,12 @@ private:
/// value. If this is the induction variable then we extend it to N, N+1, ...
/// this is needed because each iteration in the loop corresponds to a SIMD
/// element.
- Value *getBroadcastInstrs(Value *V);
+ virtual Value *getBroadcastInstrs(Value *V);
/// This function adds 0, 1, 2 ... to each vector element, starting at zero.
/// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...).
/// The sequence starts at StartIndex.
- Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
+ virtual Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
/// When we go over instructions in the basic block we rely on previous
/// values within the current basic block or on loop invariant values.
@@ -227,7 +252,7 @@ private:
VectorParts &getVectorValue(Value *V);
/// Generate a shuffle sequence that will reverse the vector Vec.
- Value *reverseVector(Value *Vec);
+ virtual Value *reverseVector(Value *Vec);
/// This is a helper class that holds the vectorizer state. It maps scalar
/// instructions to vector instructions. When the code is 'unrolled' then
@@ -285,6 +310,8 @@ private:
/// The vectorization SIMD factor to use. Each vector will have this many
/// vector elements.
unsigned VF;
+
+protected:
/// The vectorization unroll factor to use. Each scalar is vectorized to this
/// many different vector instructions.
unsigned UF;
@@ -313,10 +340,57 @@ private:
PHINode *Induction;
/// The induction variable of the old basic block.
PHINode *OldInduction;
+ /// Holds the extended (to the widest induction type) start index.
+ Value *ExtendedIdx;
/// Maps scalars to widened vectors.
ValueMap WidenMap;
+ EdgeMaskCache MaskCache;
};
+class InnerLoopUnroller : public InnerLoopVectorizer {
+public:
+ InnerLoopUnroller(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
+ DominatorTree *DT, DataLayout *DL,
+ const TargetLibraryInfo *TLI, unsigned UnrollFactor) :
+ InnerLoopVectorizer(OrigLoop, SE, LI, DT, DL, TLI, 1, UnrollFactor) { }
+
+private:
+ virtual void scalarizeInstruction(Instruction *Instr);
+ virtual void vectorizeMemoryInstruction(Instruction *Instr,
+ LoopVectorizationLegality *Legal);
+ virtual Value *getBroadcastInstrs(Value *V);
+ virtual Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
+ virtual Value *reverseVector(Value *Vec);
+};
+
+/// \brief Look for a meaningful debug location on the instruction or it's
+/// operands.
+static Instruction *getDebugLocFromInstOrOperands(Instruction *I) {
+ if (!I)
+ return I;
+
+ DebugLoc Empty;
+ if (I->getDebugLoc() != Empty)
+ return I;
+
+ for (User::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI) {
+ if (Instruction *OpInst = dyn_cast<Instruction>(*OI))
+ if (OpInst->getDebugLoc() != Empty)
+ return OpInst;
+ }
+
+ return I;
+}
+
+/// \brief Set the debug location in the builder using the debug location in the
+/// instruction.
+static void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) {
+ if (const Instruction *Inst = dyn_cast_or_null<Instruction>(Ptr))
+ B.SetCurrentDebugLocation(Inst->getDebugLoc());
+ else
+ B.SetCurrentDebugLocation(DebugLoc());
+}
+
/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
/// to what vectorization factor.
/// This class does not look at the profitability of vectorization, only the
@@ -333,10 +407,10 @@ private:
class LoopVectorizationLegality {
public:
LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DataLayout *DL,
- DominatorTree *DT, TargetTransformInfo* TTI,
- AliasAnalysis *AA, TargetLibraryInfo *TLI)
- : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
- Induction(0), HasFunNoNaNAttr(false) {}
+ DominatorTree *DT, TargetLibraryInfo *TLI)
+ : TheLoop(L), SE(SE), DL(DL), DT(DT), TLI(TLI),
+ Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false),
+ MaxSafeDepDistBytes(-1U) {}
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
@@ -372,7 +446,7 @@ public:
MRK_FloatMax
};
- /// This POD struct holds information about reduction variables.
+ /// This struct holds information about reduction variables.
struct ReductionDescriptor {
ReductionDescriptor() : StartValue(0), LoopExitInstr(0),
Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {}
@@ -409,8 +483,8 @@ public:
MinMaxReductionKind MinMaxKind;
};
- // This POD struct holds information about the memory runtime legality
- // check that a group of pointers do not overlap.
+ /// This struct holds information about the memory runtime legality
+ /// check that a group of pointers do not overlap.
struct RuntimePointerCheck {
RuntimePointerCheck() : Need(false) {}
@@ -420,10 +494,13 @@ public:
Pointers.clear();
Starts.clear();
Ends.clear();
+ IsWritePtr.clear();
+ DependencySetId.clear();
}
/// Insert a pointer and calculate the start and end SCEVs.
- void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr);
+ void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr,
+ unsigned DepSetId);
/// This flag indicates if we need to add the runtime check.
bool Need;
@@ -435,9 +512,12 @@ public:
SmallVector<const SCEV*, 2> Ends;
/// Holds the information if this pointer is used for writing to memory.
SmallVector<bool, 2> IsWritePtr;
+ /// Holds the id of the set of pointers that could be dependent because of a
+ /// shared underlying object.
+ SmallVector<unsigned, 2> DependencySetId;
};
- /// A POD for saving information about induction variables.
+ /// A struct for saving information about induction variables.
struct InductionInfo {
InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {}
InductionInfo() : StartValue(0), IK(IK_NoInduction) {}
@@ -455,11 +535,6 @@ public:
/// induction descriptor.
typedef MapVector<PHINode*, InductionInfo> InductionList;
- /// Alias(Multi)Map stores the values (GEPs or underlying objects and their
- /// respective Store/Load instruction(s) to calculate aliasing.
- typedef MapVector<Value*, Instruction* > AliasMap;
- typedef DenseMap<Value*, std::vector<Instruction*> > AliasMultiMap;
-
/// Returns true if it is legal to vectorize this loop.
/// This does not mean that it is profitable to vectorize this
/// loop, only that it is legal to do so.
@@ -474,6 +549,9 @@ public:
/// Returns the induction variables found in the loop.
InductionList *getInductionVars() { return &Inductions; }
+ /// Returns the widest induction type.
+ Type *getWidestInductionType() { return WidestIndTy; }
+
/// Returns True if V is an induction variable in this loop.
bool isInductionVariable(const Value *V);
@@ -503,6 +581,9 @@ public:
/// This function returns the identity element (or neutral element) for
/// the operation K.
static Constant *getReductionIdentity(ReductionKind K, Type *Tp);
+
+ unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
+
private:
/// Check if a single basic block loop is vectorizable.
/// At this point we know that this is a loop with a constant trip count
@@ -523,8 +604,9 @@ private:
void collectLoopUniforms();
/// Return true if all of the instructions in the block can be speculatively
- /// executed.
- bool blockCanBePredicated(BasicBlock *BB);
+ /// executed. \p SafePtrs is a list of addresses that are known to be legal
+ /// and we know that we can read from them without segfault.
+ bool blockCanBePredicated(BasicBlock *BB, SmallPtrSet<Value *, 8>& SafePtrs);
/// Returns True, if 'Phi' is the kind of reduction variable for type
/// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
@@ -543,16 +625,6 @@ private:
/// Returns the induction kind of Phi. This function may return NoInduction
/// if the PHI is not an induction variable.
InductionKind isInductionVariable(PHINode *Phi);
- /// Return true if can compute the address bounds of Ptr within the loop.
- bool hasComputableBounds(Value *Ptr);
- /// Return true if there is the chance of write reorder.
- bool hasPossibleGlobalWriteReorder(Value *Object,
- Instruction *Inst,
- AliasMultiMap &WriteObjects,
- unsigned MaxByteWidth);
- /// Return the AA location for a load or a store.
- AliasAnalysis::Location getLoadStoreLocation(Instruction *Inst);
-
/// The loop that we evaluate.
Loop *TheLoop;
@@ -562,10 +634,6 @@ private:
DataLayout *DL;
/// Dominators.
DominatorTree *DT;
- /// Target Info.
- TargetTransformInfo *TTI;
- /// Alias Analysis.
- AliasAnalysis *AA;
/// Target Library Info.
TargetLibraryInfo *TLI;
@@ -580,6 +648,8 @@ private:
/// Notice that inductions don't need to start at zero and that induction
/// variables can be pointers.
InductionList Inductions;
+ /// Holds the widest induction type encountered.
+ Type *WidestIndTy;
/// Allowed outside users. This holds the reduction
/// vars which can be accessed from outside the loop.
@@ -592,6 +662,8 @@ private:
RuntimePointerCheck PtrRtCheck;
/// Can we assume the absence of NaNs.
bool HasFunNoNaNAttr;
+
+ unsigned MaxSafeDepDistBytes;
};
/// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -684,12 +756,140 @@ private:
const TargetLibraryInfo *TLI;
};
+/// Utility class for getting and setting loop vectorizer hints in the form
+/// of loop metadata.
+struct LoopVectorizeHints {
+ /// Vectorization width.
+ unsigned Width;
+ /// Vectorization unroll factor.
+ unsigned Unroll;
+
+ LoopVectorizeHints(const Loop *L, bool DisableUnrolling)
+ : Width(VectorizationFactor)
+ , Unroll(DisableUnrolling ? 1 : VectorizationUnroll)
+ , LoopID(L->getLoopID()) {
+ getHints(L);
+ // The command line options override any loop metadata except for when
+ // width == 1 which is used to indicate the loop is already vectorized.
+ if (VectorizationFactor.getNumOccurrences() > 0 && Width != 1)
+ Width = VectorizationFactor;
+ if (VectorizationUnroll.getNumOccurrences() > 0)
+ Unroll = VectorizationUnroll;
+
+ DEBUG(if (DisableUnrolling && Unroll == 1)
+ dbgs() << "LV: Unrolling disabled by the pass manager\n");
+ }
+
+ /// Return the loop vectorizer metadata prefix.
+ static StringRef Prefix() { return "llvm.vectorizer."; }
+
+ MDNode *createHint(LLVMContext &Context, StringRef Name, unsigned V) {
+ SmallVector<Value*, 2> Vals;
+ Vals.push_back(MDString::get(Context, Name));
+ Vals.push_back(ConstantInt::get(Type::getInt32Ty(Context), V));
+ return MDNode::get(Context, Vals);
+ }
+
+ /// Mark the loop L as already vectorized by setting the width to 1.
+ void setAlreadyVectorized(Loop *L) {
+ LLVMContext &Context = L->getHeader()->getContext();
+
+ Width = 1;
+
+ // Create a new loop id with one more operand for the already_vectorized
+ // hint. If the loop already has a loop id then copy the existing operands.
+ SmallVector<Value*, 4> Vals(1);
+ if (LoopID)
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i)
+ Vals.push_back(LoopID->getOperand(i));
+
+ Vals.push_back(createHint(Context, Twine(Prefix(), "width").str(), Width));
+ Vals.push_back(createHint(Context, Twine(Prefix(), "unroll").str(), 1));
+
+ MDNode *NewLoopID = MDNode::get(Context, Vals);
+ // Set operand 0 to refer to the loop id itself.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+
+ L->setLoopID(NewLoopID);
+ if (LoopID)
+ LoopID->replaceAllUsesWith(NewLoopID);
+
+ LoopID = NewLoopID;
+ }
+
+private:
+ MDNode *LoopID;
+
+ /// Find hints specified in the loop metadata.
+ void getHints(const Loop *L) {
+ if (!LoopID)
+ return;
+
+ // First operand should refer to the loop id itself.
+ assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
+ assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
+
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+ const MDString *S = 0;
+ SmallVector<Value*, 4> Args;
+
+ // The expected hint is either a MDString or a MDNode with the first
+ // operand a MDString.
+ if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
+ if (!MD || MD->getNumOperands() == 0)
+ continue;
+ S = dyn_cast<MDString>(MD->getOperand(0));
+ for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
+ Args.push_back(MD->getOperand(i));
+ } else {
+ S = dyn_cast<MDString>(LoopID->getOperand(i));
+ assert(Args.size() == 0 && "too many arguments for MDString");
+ }
+
+ if (!S)
+ continue;
+
+ // Check if the hint starts with the vectorizer prefix.
+ StringRef Hint = S->getString();
+ if (!Hint.startswith(Prefix()))
+ continue;
+ // Remove the prefix.
+ Hint = Hint.substr(Prefix().size(), StringRef::npos);
+
+ if (Args.size() == 1)
+ getHint(Hint, Args[0]);
+ }
+ }
+
+ // Check string hint with one operand.
+ void getHint(StringRef Hint, Value *Arg) {
+ const ConstantInt *C = dyn_cast<ConstantInt>(Arg);
+ if (!C) return;
+ unsigned Val = C->getZExtValue();
+
+ if (Hint == "width") {
+ if (isPowerOf2_32(Val) && Val <= MaxVectorWidth)
+ Width = Val;
+ else
+ DEBUG(dbgs() << "LV: ignoring invalid width hint metadata\n");
+ } else if (Hint == "unroll") {
+ if (isPowerOf2_32(Val) && Val <= MaxUnrollFactor)
+ Unroll = Val;
+ else
+ DEBUG(dbgs() << "LV: ignoring invalid unroll hint metadata\n");
+ } else {
+ DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint << '\n');
+ }
+ }
+};
+
/// The LoopVectorize Pass.
struct LoopVectorize : public LoopPass {
/// Pass identification, replacement for typeid
static char ID;
- explicit LoopVectorize() : LoopPass(ID) {
+ explicit LoopVectorize(bool NoUnrolling = false)
+ : LoopPass(ID), DisableUnrolling(NoUnrolling) {
initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
}
@@ -698,8 +898,8 @@ struct LoopVectorize : public LoopPass {
LoopInfo *LI;
TargetTransformInfo *TTI;
DominatorTree *DT;
- AliasAnalysis *AA;
TargetLibraryInfo *TLI;
+ bool DisableUnrolling;
virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
// We only vectorize innermost loops.
@@ -711,19 +911,30 @@ struct LoopVectorize : public LoopPass {
LI = &getAnalysis<LoopInfo>();
TTI = &getAnalysis<TargetTransformInfo>();
DT = &getAnalysis<DominatorTree>();
- AA = getAnalysisIfAvailable<AliasAnalysis>();
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+ // If the target claims to have no vector registers don't attempt
+ // vectorization.
+ if (!TTI->getNumberOfRegisters(true))
+ return false;
+
if (DL == NULL) {
- DEBUG(dbgs() << "LV: Not vectorizing because of missing data layout");
+ DEBUG(dbgs() << "LV: Not vectorizing because of missing data layout\n");
return false;
}
DEBUG(dbgs() << "LV: Checking a loop in \"" <<
L->getHeader()->getParent()->getName() << "\"\n");
+ LoopVectorizeHints Hints(L, DisableUnrolling);
+
+ if (Hints.Width == 1 && Hints.Unroll == 1) {
+ DEBUG(dbgs() << "LV: Not vectorizing.\n");
+ return false;
+ }
+
// Check if it is legal to vectorize the loop.
- LoopVectorizationLegality LVL(L, SE, DL, DT, TTI, AA, TLI);
+ LoopVectorizationLegality LVL(L, SE, DL, DT, TLI);
if (!LVL.canVectorize()) {
DEBUG(dbgs() << "LV: Not vectorizing.\n");
return false;
@@ -749,23 +960,30 @@ struct LoopVectorize : public LoopPass {
// Select the optimal vectorization factor.
LoopVectorizationCostModel::VectorizationFactor VF;
- VF = CM.selectVectorizationFactor(OptForSize, VectorizationFactor);
+ VF = CM.selectVectorizationFactor(OptForSize, Hints.Width);
// Select the unroll factor.
- unsigned UF = CM.selectUnrollFactor(OptForSize, VectorizationUnroll,
- VF.Width, VF.Cost);
+ unsigned UF = CM.selectUnrollFactor(OptForSize, Hints.Unroll, VF.Width,
+ VF.Cost);
+
+ DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF.Width << ") in "<<
+ F->getParent()->getModuleIdentifier() << '\n');
+ DEBUG(dbgs() << "LV: Unroll Factor is " << UF << '\n');
if (VF.Width == 1) {
DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
- return false;
+ if (UF == 1)
+ return false;
+ // We decided not to vectorize, but we may want to unroll.
+ InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF);
+ Unroller.vectorize(&LVL);
+ } else {
+ // If we decided that it is *legal* to vectorize the loop then do it.
+ InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
+ LB.vectorize(&LVL);
}
- DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF.Width << ") in "<<
- F->getParent()->getModuleIdentifier()<<"\n");
- DEBUG(dbgs() << "LV: Unroll Factor is " << UF << "\n");
-
- // If we decided that it is *legal* to vectorize the loop then do it.
- InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
- LB.vectorize(&LVL);
+ // Mark the loop as already vectorized to avoid vectorizing again.
+ Hints.setAlreadyVectorized(L);
DEBUG(verifyFunction(*L->getHeader()->getParent()));
return true;
@@ -795,38 +1013,34 @@ struct LoopVectorize : public LoopPass {
void
LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
Loop *Lp, Value *Ptr,
- bool WritePtr) {
+ bool WritePtr,
+ unsigned DepSetId) {
const SCEV *Sc = SE->getSCEV(Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
assert(AR && "Invalid addrec expression");
- const SCEV *Ex = SE->getExitCount(Lp, Lp->getLoopLatch());
+ const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
Pointers.push_back(Ptr);
Starts.push_back(AR->getStart());
Ends.push_back(ScEnd);
IsWritePtr.push_back(WritePtr);
+ DependencySetId.push_back(DepSetId);
}
Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
- // Save the current insertion location.
- Instruction *Loc = Builder.GetInsertPoint();
-
// We need to place the broadcast of invariant variables outside the loop.
Instruction *Instr = dyn_cast<Instruction>(V);
bool NewInstr = (Instr && Instr->getParent() == LoopVectorBody);
bool Invariant = OrigLoop->isLoopInvariant(V) && !NewInstr;
// Place the code for broadcasting invariant variables in the new preheader.
+ IRBuilder<>::InsertPointGuard Guard(Builder);
if (Invariant)
Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
// Broadcast the scalar into all locations in the vector.
Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
- // Restore the builder insertion point.
- if (Invariant)
- Builder.SetInsertPoint(Loc);
-
return Shuf;
}
@@ -853,10 +1067,35 @@ Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx,
return Builder.CreateAdd(Val, Cv, "induction");
}
+/// \brief Find the operand of the GEP that should be checked for consecutive
+/// stores. This ignores trailing indices that have no effect on the final
+/// pointer.
+static unsigned getGEPInductionOperand(DataLayout *DL,
+ const GetElementPtrInst *Gep) {
+ unsigned LastOperand = Gep->getNumOperands() - 1;
+ unsigned GEPAllocSize = DL->getTypeAllocSize(
+ cast<PointerType>(Gep->getType()->getScalarType())->getElementType());
+
+ // Walk backwards and try to peel off zeros.
+ while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
+ // Find the type we're currently indexing into.
+ gep_type_iterator GEPTI = gep_type_begin(Gep);
+ std::advance(GEPTI, LastOperand - 1);
+
+ // If it's a type with the same allocation size as the result of the GEP we
+ // can peel off the zero index.
+ if (DL->getTypeAllocSize(*GEPTI) != GEPAllocSize)
+ break;
+ --LastOperand;
+ }
+
+ return LastOperand;
+}
+
int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr");
// Make sure that the pointer does not point to structs.
- if (cast<PointerType>(Ptr->getType())->getElementType()->isAggregateType())
+ if (Ptr->getType()->getPointerElementType()->isAggregateType())
return 0;
// If this value is a pointer induction variable we know it is consecutive.
@@ -874,8 +1113,6 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
return 0;
unsigned NumOperands = Gep->getNumOperands();
- Value *LastIndex = Gep->getOperand(NumOperands - 1);
-
Value *GpPtr = Gep->getPointerOperand();
// If this GEP value is a consecutive pointer induction variable and all of
// the indices are constant then we know it is consecutive. We can
@@ -899,14 +1136,18 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
return -1;
}
- // Check that all of the gep indices are uniform except for the last.
- for (unsigned i = 0; i < NumOperands - 1; ++i)
- if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
+ unsigned InductionOperand = getGEPInductionOperand(DL, Gep);
+
+ // Check that all of the gep indices are uniform except for our induction
+ // operand.
+ for (unsigned i = 0; i != NumOperands; ++i)
+ if (i != InductionOperand &&
+ !SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
return 0;
- // We can emit wide load/stores only if the last index is the induction
- // variable.
- const SCEV *Last = SE->getSCEV(LastIndex);
+ // We can emit wide load/stores only if the last non-zero index is the
+ // induction variable.
+ const SCEV *Last = SE->getSCEV(Gep->getOperand(InductionOperand));
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) {
const SCEV *Step = AR->getStepRecurrence(*SE);
@@ -964,7 +1205,11 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
Type *DataTy = VectorType::get(ScalarDataTy, VF);
Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment();
-
+ // An alignment of 0 means target abi alignment. We need to use the scalar's
+ // target abi alignment in such a case.
+ if (!Alignment)
+ Alignment = DL->getABITypeAlignment(ScalarDataTy);
+ unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy);
unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF;
@@ -985,6 +1230,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
// Handle consecutive loads/stores.
GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) {
+ setDebugLocFromInst(Builder, Gep);
Value *PtrOperand = Gep->getPointerOperand();
Value *FirstBasePtr = getVectorValue(PtrOperand)[0];
FirstBasePtr = Builder.CreateExtractElement(FirstBasePtr, Zero);
@@ -995,26 +1241,40 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
Gep2->setName("gep.indvar.base");
Ptr = Builder.Insert(Gep2);
} else if (Gep) {
+ setDebugLocFromInst(Builder, Gep);
assert(SE->isLoopInvariant(SE->getSCEV(Gep->getPointerOperand()),
OrigLoop) && "Base ptr must be invariant");
// The last index does not have to be the induction. It can be
// consecutive and be a function of the index. For example A[I+1];
unsigned NumOperands = Gep->getNumOperands();
-
- Value *LastGepOperand = Gep->getOperand(NumOperands - 1);
- VectorParts &GEPParts = getVectorValue(LastGepOperand);
- Value *LastIndex = GEPParts[0];
- LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
-
+ unsigned InductionOperand = getGEPInductionOperand(DL, Gep);
// Create the new GEP with the new induction variable.
GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
- Gep2->setOperand(NumOperands - 1, LastIndex);
- Gep2->setName("gep.indvar.idx");
+
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ Value *GepOperand = Gep->getOperand(i);
+ Instruction *GepOperandInst = dyn_cast<Instruction>(GepOperand);
+
+ // Update last index or loop invariant instruction anchored in loop.
+ if (i == InductionOperand ||
+ (GepOperandInst && OrigLoop->contains(GepOperandInst))) {
+ assert((i == InductionOperand ||
+ SE->isLoopInvariant(SE->getSCEV(GepOperandInst), OrigLoop)) &&
+ "Must be last index or loop invariant");
+
+ VectorParts &GEPParts = getVectorValue(GepOperand);
+ Value *Index = GEPParts[0];
+ Index = Builder.CreateExtractElement(Index, Zero);
+ Gep2->setOperand(i, Index);
+ Gep2->setName("gep.indvar.idx");
+ }
+ }
Ptr = Builder.Insert(Gep2);
} else {
// Use the induction element ptr.
assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
+ setDebugLocFromInst(Builder, Ptr);
VectorParts &PtrVal = getVectorValue(Ptr);
Ptr = Builder.CreateExtractElement(PtrVal[0], Zero);
}
@@ -1023,8 +1283,11 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
if (SI) {
assert(!Legal->isUniform(SI->getPointerOperand()) &&
"We do not allow storing to uniform addresses");
+ setDebugLocFromInst(Builder, SI);
+ // We don't want to update the value in the map as it might be used in
+ // another expression. So don't use a reference type for "StoredVal".
+ VectorParts StoredVal = getVectorValue(SI->getValueOperand());
- VectorParts &StoredVal = getVectorValue(SI->getValueOperand());
for (unsigned Part = 0; Part < UF; ++Part) {
// Calculate the pointer for the specific unroll-part.
Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
@@ -1039,11 +1302,16 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
}
- Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo());
+ Value *VecPtr = Builder.CreateBitCast(PartPtr,
+ DataTy->getPointerTo(AddressSpace));
Builder.CreateStore(StoredVal[Part], VecPtr)->setAlignment(Alignment);
}
+ return;
}
+ // Handle loads.
+ assert(LI && "Must have a load instruction");
+ setDebugLocFromInst(Builder, LI);
for (unsigned Part = 0; Part < UF; ++Part) {
// Calculate the pointer for the specific unroll-part.
Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
@@ -1055,7 +1323,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
}
- Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo());
+ Value *VecPtr = Builder.CreateBitCast(PartPtr,
+ DataTy->getPointerTo(AddressSpace));
Value *LI = Builder.CreateLoad(VecPtr, "wide.load");
cast<LoadInst>(LI)->setAlignment(Alignment);
Entry[Part] = Reverse ? reverseVector(LI) : LI;
@@ -1067,6 +1336,8 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
// Holds vector parameters or scalars, in case of uniform vals.
SmallVector<VectorParts, 4> Params;
+ setDebugLocFromInst(Builder, Instr);
+
// Find all of the vectorized parameters.
for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
Value *SrcOp = Instr->getOperand(op);
@@ -1112,7 +1383,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
Instruction *Cloned = Instr->clone();
if (!IsVoidRetTy)
Cloned->setName(Instr->getName() + ".cloned");
- // Replace the operands of the cloned instrucions with extracted scalars.
+ // Replace the operands of the cloned instructions with extracted scalars.
for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
Value *Op = Params[op][Part];
// Param is a vector. Need to extract the right lane.
@@ -1142,16 +1413,13 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
if (!PtrRtCheck->Need)
return NULL;
- Instruction *MemoryRuntimeCheck = 0;
unsigned NumPointers = PtrRtCheck->Pointers.size();
- SmallVector<Value* , 2> Starts;
- SmallVector<Value* , 2> Ends;
+ SmallVector<TrackingVH<Value> , 2> Starts;
+ SmallVector<TrackingVH<Value> , 2> Ends;
+ LLVMContext &Ctx = Loc->getContext();
SCEVExpander Exp(*SE, "induction");
- // Use this type for pointer arithmetic.
- Type* PtrArithTy = Type::getInt8PtrTy(Loc->getContext(), 0);
-
for (unsigned i = 0; i < NumPointers; ++i) {
Value *Ptr = PtrRtCheck->Pointers[i];
const SCEV *Sc = SE->getSCEV(Ptr);
@@ -1162,7 +1430,11 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
Starts.push_back(Ptr);
Ends.push_back(Ptr);
} else {
- DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr <<"\n");
+ DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr << '\n');
+ unsigned AS = Ptr->getType()->getPointerAddressSpace();
+
+ // Use this type for pointer arithmetic.
+ Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i], PtrArithTy, Loc);
Value *End = Exp.expandCodeFor(PtrRtCheck->Ends[i], PtrArithTy, Loc);
@@ -1172,17 +1444,32 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
}
IRBuilder<> ChkBuilder(Loc);
-
+ // Our instructions might fold to a constant.
+ Value *MemoryRuntimeCheck = 0;
for (unsigned i = 0; i < NumPointers; ++i) {
for (unsigned j = i+1; j < NumPointers; ++j) {
// No need to check if two readonly pointers intersect.
if (!PtrRtCheck->IsWritePtr[i] && !PtrRtCheck->IsWritePtr[j])
continue;
- Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy, "bc");
- Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy, "bc");
- Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy, "bc");
- Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy, "bc");
+ // Only need to check pointers between two different dependency sets.
+ if (PtrRtCheck->DependencySetId[i] == PtrRtCheck->DependencySetId[j])
+ continue;
+
+ unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
+ unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
+
+ assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) &&
+ (AS1 == Ends[i]->getType()->getPointerAddressSpace()) &&
+ "Trying to bounds check pointers with different address spaces");
+
+ Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
+ Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
+
+ Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc");
+ Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc");
+ Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1, "bc");
+ Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc");
Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
@@ -1190,12 +1477,17 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
if (MemoryRuntimeCheck)
IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
"conflict.rdx");
-
- MemoryRuntimeCheck = cast<Instruction>(IsConflict);
+ MemoryRuntimeCheck = IsConflict;
}
}
- return MemoryRuntimeCheck;
+ // We have to do this trickery because the IRBuilder might fold the check to a
+ // constant expression in which case there is no Instruction anchored in a
+ // the block.
+ Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
+ ConstantInt::getTrue(Ctx));
+ ChkBuilder.Insert(Check, "memcheck.conflict");
+ return Check;
}
void
@@ -1234,23 +1526,27 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
BasicBlock *ExitBlock = OrigLoop->getExitBlock();
assert(ExitBlock && "Must have an exit block");
- // Mark the old scalar loop with metadata that tells us not to vectorize this
- // loop again if we run into it.
- MDNode *MD = MDNode::get(OldBasicBlock->getContext(), None);
- OldBasicBlock->getTerminator()->setMetadata(AlreadyVectorizedMDName, MD);
-
// Some loops have a single integer induction variable, while other loops
// don't. One example is c++ iterators that often have multiple pointer
// induction variables. In the code below we also support a case where we
// don't have a single induction variable.
OldInduction = Legal->getInduction();
- Type *IdxTy = OldInduction ? OldInduction->getType() :
- DL->getIntPtrType(SE->getContext());
+ Type *IdxTy = Legal->getWidestInductionType();
// Find the loop boundaries.
- const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getLoopLatch());
+ const SCEV *ExitCount = SE->getBackedgeTakenCount(OrigLoop);
assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
+ // The exit count might have the type of i64 while the phi is i32. This can
+ // happen if we have an induction variable that is sign extended before the
+ // compare. The only way that we get a backedge taken count is that the
+ // induction variable was signed and as such will not overflow. In such a case
+ // truncation is legal.
+ if (ExitCount->getType()->getPrimitiveSizeInBits() >
+ IdxTy->getPrimitiveSizeInBits())
+ ExitCount = SE->getTruncateOrNoop(ExitCount, IdxTy);
+
+ ExitCount = SE->getNoopOrZeroExtend(ExitCount, IdxTy);
// Get the total trip count from the count by adding 1.
ExitCount = SE->getAddExpr(ExitCount,
SE->getConstant(ExitCount->getType(), 1));
@@ -1266,9 +1562,11 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
// The loop index does not have to start at Zero. Find the original start
// value from the induction PHI node. If we don't have an induction variable
// then we know that it starts at zero.
- Value *StartIdx = OldInduction ?
- OldInduction->getIncomingValueForBlock(BypassBlock):
- ConstantInt::get(IdxTy, 0);
+ Builder.SetInsertPoint(BypassBlock->getTerminator());
+ Value *StartIdx = ExtendedIdx = OldInduction ?
+ Builder.CreateZExt(OldInduction->getIncomingValueForBlock(BypassBlock),
+ IdxTy):
+ ConstantInt::get(IdxTy, 0);
assert(BypassBlock && "Invalid loop structure");
LoopBypassBlocks.push_back(BypassBlock);
@@ -1283,11 +1581,28 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
BasicBlock *ScalarPH =
MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph");
+ // Create and register the new vector loop.
+ Loop* Lp = new Loop();
+ Loop *ParentLoop = OrigLoop->getParentLoop();
+
+ // Insert the new loop into the loop nest and register the new basic blocks
+ // before calling any utilities such as SCEV that require valid LoopInfo.
+ if (ParentLoop) {
+ ParentLoop->addChildLoop(Lp);
+ ParentLoop->addBasicBlockToLoop(ScalarPH, LI->getBase());
+ ParentLoop->addBasicBlockToLoop(VectorPH, LI->getBase());
+ ParentLoop->addBasicBlockToLoop(MiddleBlock, LI->getBase());
+ } else {
+ LI->addTopLevelLoop(Lp);
+ }
+ Lp->addBasicBlockToLoop(VecBody, LI->getBase());
+
// Use this IR builder to create the loop instructions (Phi, Br, Cmp)
// inside the loop.
- Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
+ Builder.SetInsertPoint(VecBody->getFirstNonPHI());
// Generate the induction variable.
+ setDebugLocFromInst(Builder, getDebugLocFromInstOrOperands(OldInduction));
Induction = Builder.CreatePHI(IdxTy, 2, "index");
// The loop step is equal to the vectorization factor (num of SIMD elements)
// times the unroll factor (num of SIMD instructions).
@@ -1296,6 +1611,8 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
// This is the IR builder that we use to add all of the logic for bypassing
// the new vector loop.
IRBuilder<> BypassBuilder(BypassBlock->getTerminator());
+ setDebugLocFromInst(BypassBuilder,
+ getDebugLocFromInstOrOperands(OldInduction));
// We may need to extend the index in case there is a type mismatch.
// We know that the count starts at zero and does not overflow.
@@ -1334,6 +1651,8 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
// Create a new block containing the memory check.
BasicBlock *CheckBlock = BypassBlock->splitBasicBlock(MemRuntimeCheck,
"vector.memcheck");
+ if (ParentLoop)
+ ParentLoop->addBasicBlockToLoop(CheckBlock, LI->getBase());
LoopBypassBlocks.push_back(CheckBlock);
// Replace the branch into the memory check block with a conditional branch
@@ -1362,76 +1681,101 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
PHINode *ResumeIndex = 0;
LoopVectorizationLegality::InductionList::iterator I, E;
LoopVectorizationLegality::InductionList *List = Legal->getInductionVars();
+ // Set builder to point to last bypass block.
+ BypassBuilder.SetInsertPoint(LoopBypassBlocks.back()->getTerminator());
for (I = List->begin(), E = List->end(); I != E; ++I) {
PHINode *OrigPhi = I->first;
LoopVectorizationLegality::InductionInfo II = I->second;
- PHINode *ResumeVal = PHINode::Create(OrigPhi->getType(), 2, "resume.val",
+
+ Type *ResumeValTy = (OrigPhi == OldInduction) ? IdxTy : OrigPhi->getType();
+ PHINode *ResumeVal = PHINode::Create(ResumeValTy, 2, "resume.val",
MiddleBlock->getTerminator());
+ // We might have extended the type of the induction variable but we need a
+ // truncated version for the scalar loop.
+ PHINode *TruncResumeVal = (OrigPhi == OldInduction) ?
+ PHINode::Create(OrigPhi->getType(), 2, "trunc.resume.val",
+ MiddleBlock->getTerminator()) : 0;
+
Value *EndValue = 0;
switch (II.IK) {
case LoopVectorizationLegality::IK_NoInduction:
llvm_unreachable("Unknown induction");
case LoopVectorizationLegality::IK_IntInduction: {
- // Handle the integer induction counter:
+ // Handle the integer induction counter.
assert(OrigPhi->getType()->isIntegerTy() && "Invalid type");
- assert(OrigPhi == OldInduction && "Unknown integer PHI");
- // We know what the end value is.
- EndValue = IdxEndRoundDown;
- // We also know which PHI node holds it.
- ResumeIndex = ResumeVal;
+
+ // We have the canonical induction variable.
+ if (OrigPhi == OldInduction) {
+ // Create a truncated version of the resume value for the scalar loop,
+ // we might have promoted the type to a larger width.
+ EndValue =
+ BypassBuilder.CreateTrunc(IdxEndRoundDown, OrigPhi->getType());
+ // The new PHI merges the original incoming value, in case of a bypass,
+ // or the value at the end of the vectorized loop.
+ for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+ TruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
+ TruncResumeVal->addIncoming(EndValue, VecBody);
+
+ // We know what the end value is.
+ EndValue = IdxEndRoundDown;
+ // We also know which PHI node holds it.
+ ResumeIndex = ResumeVal;
+ break;
+ }
+
+ // Not the canonical induction variable - add the vector loop count to the
+ // start value.
+ Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
+ II.StartValue->getType(),
+ "cast.crd");
+ EndValue = BypassBuilder.CreateAdd(CRD, II.StartValue , "ind.end");
break;
}
case LoopVectorizationLegality::IK_ReverseIntInduction: {
// Convert the CountRoundDown variable to the PHI size.
- unsigned CRDSize = CountRoundDown->getType()->getScalarSizeInBits();
- unsigned IISize = II.StartValue->getType()->getScalarSizeInBits();
- Value *CRD = CountRoundDown;
- if (CRDSize > IISize)
- CRD = CastInst::Create(Instruction::Trunc, CountRoundDown,
- II.StartValue->getType(), "tr.crd",
- LoopBypassBlocks.back()->getTerminator());
- else if (CRDSize < IISize)
- CRD = CastInst::Create(Instruction::SExt, CountRoundDown,
- II.StartValue->getType(),
- "sext.crd",
- LoopBypassBlocks.back()->getTerminator());
- // Handle reverse integer induction counter:
- EndValue =
- BinaryOperator::CreateSub(II.StartValue, CRD, "rev.ind.end",
- LoopBypassBlocks.back()->getTerminator());
+ Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
+ II.StartValue->getType(),
+ "cast.crd");
+ // Handle reverse integer induction counter.
+ EndValue = BypassBuilder.CreateSub(II.StartValue, CRD, "rev.ind.end");
break;
}
case LoopVectorizationLegality::IK_PtrInduction: {
// For pointer induction variables, calculate the offset using
// the end index.
- EndValue =
- GetElementPtrInst::Create(II.StartValue, CountRoundDown, "ptr.ind.end",
- LoopBypassBlocks.back()->getTerminator());
+ EndValue = BypassBuilder.CreateGEP(II.StartValue, CountRoundDown,
+ "ptr.ind.end");
break;
}
case LoopVectorizationLegality::IK_ReversePtrInduction: {
// The value at the end of the loop for the reverse pointer is calculated
// by creating a GEP with a negative index starting from the start value.
Value *Zero = ConstantInt::get(CountRoundDown->getType(), 0);
- Value *NegIdx = BinaryOperator::CreateSub(Zero, CountRoundDown,
- "rev.ind.end",
- LoopBypassBlocks.back()->getTerminator());
- EndValue = GetElementPtrInst::Create(II.StartValue, NegIdx,
- "rev.ptr.ind.end",
- LoopBypassBlocks.back()->getTerminator());
+ Value *NegIdx = BypassBuilder.CreateSub(Zero, CountRoundDown,
+ "rev.ind.end");
+ EndValue = BypassBuilder.CreateGEP(II.StartValue, NegIdx,
+ "rev.ptr.ind.end");
break;
}
}// end of case
// The new PHI merges the original incoming value, in case of a bypass,
// or the value at the end of the vectorized loop.
- for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
- ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
+ for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) {
+ if (OrigPhi == OldInduction)
+ ResumeVal->addIncoming(StartIdx, LoopBypassBlocks[I]);
+ else
+ ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
+ }
ResumeVal->addIncoming(EndValue, VecBody);
// Fix the scalar body counter (PHI node).
unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
- OrigPhi->setIncomingValue(BlockIdx, ResumeVal);
+ // The old inductions phi node in the scalar body needs the truncated value.
+ if (OrigPhi == OldInduction)
+ OrigPhi->setIncomingValue(BlockIdx, TruncResumeVal);
+ else
+ OrigPhi->setIncomingValue(BlockIdx, ResumeVal);
}
// If we are generating a new induction variable then we also need to
@@ -1476,24 +1820,6 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
// Get ready to start creating new instructions into the vectorized body.
Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
- // Create and register the new vector loop.
- Loop* Lp = new Loop();
- Loop *ParentLoop = OrigLoop->getParentLoop();
-
- // Insert the new loop into the loop nest and register the new basic blocks.
- if (ParentLoop) {
- ParentLoop->addChildLoop(Lp);
- for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
- ParentLoop->addBasicBlockToLoop(LoopBypassBlocks[I], LI->getBase());
- ParentLoop->addBasicBlockToLoop(ScalarPH, LI->getBase());
- ParentLoop->addBasicBlockToLoop(VectorPH, LI->getBase());
- ParentLoop->addBasicBlockToLoop(MiddleBlock, LI->getBase());
- } else {
- LI->addTopLevelLoop(Lp);
- }
-
- Lp->addBasicBlockToLoop(VecBody, LI->getBase());
-
// Save the state.
LoopVectorPreHeader = VectorPH;
LoopScalarPreHeader = ScalarPH;
@@ -1501,6 +1827,9 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
LoopExitBlock = ExitBlock;
LoopVectorBody = VecBody;
LoopScalarBody = OldBasicBlock;
+
+ LoopVectorizeHints Hints(Lp, true);
+ Hints.setAlreadyVectorized(Lp);
}
/// This function returns the identity element (or neutral element) for
@@ -1530,6 +1859,31 @@ LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) {
}
}
+static Intrinsic::ID checkUnaryFloatSignature(const CallInst &I,
+ Intrinsic::ID ValidIntrinsicID) {
+ if (I.getNumArgOperands() != 1 ||
+ !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+ I.getType() != I.getArgOperand(0)->getType() ||
+ !I.onlyReadsMemory())
+ return Intrinsic::not_intrinsic;
+
+ return ValidIntrinsicID;
+}
+
+static Intrinsic::ID checkBinaryFloatSignature(const CallInst &I,
+ Intrinsic::ID ValidIntrinsicID) {
+ if (I.getNumArgOperands() != 2 ||
+ !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+ !I.getArgOperand(1)->getType()->isFloatingPointTy() ||
+ I.getType() != I.getArgOperand(0)->getType() ||
+ I.getType() != I.getArgOperand(1)->getType() ||
+ !I.onlyReadsMemory())
+ return Intrinsic::not_intrinsic;
+
+ return ValidIntrinsicID;
+}
+
+
static Intrinsic::ID
getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
// If we have an intrinsic call, check if it is trivially vectorizable.
@@ -1544,14 +1898,18 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
case Intrinsic::log10:
case Intrinsic::log2:
case Intrinsic::fabs:
+ case Intrinsic::copysign:
case Intrinsic::floor:
case Intrinsic::ceil:
case Intrinsic::trunc:
case Intrinsic::rint:
case Intrinsic::nearbyint:
+ case Intrinsic::round:
case Intrinsic::pow:
case Intrinsic::fma:
case Intrinsic::fmuladd:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
return II->getIntrinsicID();
default:
return Intrinsic::not_intrinsic;
@@ -1564,8 +1922,9 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
LibFunc::Func Func;
Function *F = CI->getCalledFunction();
// We're going to make assumptions on the semantics of the functions, check
- // that the target knows that it's available in this environment.
- if (!F || !TLI->getLibFunc(F->getName(), Func))
+ // that the target knows that it's available in this environment and it does
+ // not have local linkage.
+ if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(F->getName(), Func))
return Intrinsic::not_intrinsic;
// Otherwise check if we have a call to a function that can be turned into a
@@ -1576,59 +1935,67 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
case LibFunc::sin:
case LibFunc::sinf:
case LibFunc::sinl:
- return Intrinsic::sin;
+ return checkUnaryFloatSignature(*CI, Intrinsic::sin);
case LibFunc::cos:
case LibFunc::cosf:
case LibFunc::cosl:
- return Intrinsic::cos;
+ return checkUnaryFloatSignature(*CI, Intrinsic::cos);
case LibFunc::exp:
case LibFunc::expf:
case LibFunc::expl:
- return Intrinsic::exp;
+ return checkUnaryFloatSignature(*CI, Intrinsic::exp);
case LibFunc::exp2:
case LibFunc::exp2f:
case LibFunc::exp2l:
- return Intrinsic::exp2;
+ return checkUnaryFloatSignature(*CI, Intrinsic::exp2);
case LibFunc::log:
case LibFunc::logf:
case LibFunc::logl:
- return Intrinsic::log;
+ return checkUnaryFloatSignature(*CI, Intrinsic::log);
case LibFunc::log10:
case LibFunc::log10f:
case LibFunc::log10l:
- return Intrinsic::log10;
+ return checkUnaryFloatSignature(*CI, Intrinsic::log10);
case LibFunc::log2:
case LibFunc::log2f:
case LibFunc::log2l:
- return Intrinsic::log2;
+ return checkUnaryFloatSignature(*CI, Intrinsic::log2);
case LibFunc::fabs:
case LibFunc::fabsf:
case LibFunc::fabsl:
- return Intrinsic::fabs;
+ return checkUnaryFloatSignature(*CI, Intrinsic::fabs);
+ case LibFunc::copysign:
+ case LibFunc::copysignf:
+ case LibFunc::copysignl:
+ return checkBinaryFloatSignature(*CI, Intrinsic::copysign);
case LibFunc::floor:
case LibFunc::floorf:
case LibFunc::floorl:
- return Intrinsic::floor;
+ return checkUnaryFloatSignature(*CI, Intrinsic::floor);
case LibFunc::ceil:
case LibFunc::ceilf:
case LibFunc::ceill:
- return Intrinsic::ceil;
+ return checkUnaryFloatSignature(*CI, Intrinsic::ceil);
case LibFunc::trunc:
case LibFunc::truncf:
case LibFunc::truncl:
- return Intrinsic::trunc;
+ return checkUnaryFloatSignature(*CI, Intrinsic::trunc);
case LibFunc::rint:
case LibFunc::rintf:
case LibFunc::rintl:
- return Intrinsic::rint;
+ return checkUnaryFloatSignature(*CI, Intrinsic::rint);
case LibFunc::nearbyint:
case LibFunc::nearbyintf:
case LibFunc::nearbyintl:
- return Intrinsic::nearbyint;
+ return checkUnaryFloatSignature(*CI, Intrinsic::nearbyint);
+ case LibFunc::round:
+ case LibFunc::roundf:
+ case LibFunc::roundl:
+ return checkUnaryFloatSignature(*CI, Intrinsic::round);
case LibFunc::pow:
case LibFunc::powf:
case LibFunc::powl:
- return Intrinsic::pow;
+ return checkBinaryFloatSignature(*CI, Intrinsic::pow);
}
return Intrinsic::not_intrinsic;
@@ -1690,7 +2057,8 @@ Value *createMinMaxOp(IRBuilder<> &Builder,
}
Value *Cmp;
- if (RK == LoopVectorizationLegality::MRK_FloatMin || RK == LoopVectorizationLegality::MRK_FloatMax)
+ if (RK == LoopVectorizationLegality::MRK_FloatMin ||
+ RK == LoopVectorizationLegality::MRK_FloatMax)
Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
else
Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
@@ -1699,6 +2067,54 @@ Value *createMinMaxOp(IRBuilder<> &Builder,
return Select;
}
+namespace {
+struct CSEDenseMapInfo {
+ static bool canHandle(Instruction *I) {
+ return isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
+ isa<ShuffleVectorInst>(I) || isa<GetElementPtrInst>(I);
+ }
+ static inline Instruction *getEmptyKey() {
+ return DenseMapInfo<Instruction *>::getEmptyKey();
+ }
+ static inline Instruction *getTombstoneKey() {
+ return DenseMapInfo<Instruction *>::getTombstoneKey();
+ }
+ static unsigned getHashValue(Instruction *I) {
+ assert(canHandle(I) && "Unknown instruction!");
+ return hash_combine(I->getOpcode(), hash_combine_range(I->value_op_begin(),
+ I->value_op_end()));
+ }
+ static bool isEqual(Instruction *LHS, Instruction *RHS) {
+ if (LHS == getEmptyKey() || RHS == getEmptyKey() ||
+ LHS == getTombstoneKey() || RHS == getTombstoneKey())
+ return LHS == RHS;
+ return LHS->isIdenticalTo(RHS);
+ }
+};
+}
+
+///\brief Perform cse of induction variable instructions.
+static void cse(BasicBlock *BB) {
+ // Perform simple cse.
+ SmallDenseMap<Instruction *, Instruction *, 4, CSEDenseMapInfo> CSEMap;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
+ Instruction *In = I++;
+
+ if (!CSEDenseMapInfo::canHandle(In))
+ continue;
+
+ // Check if we can replace this instruction with any of the
+ // visited instructions.
+ if (Instruction *V = CSEMap.lookup(In)) {
+ In->replaceAllUsesWith(V);
+ In->eraseFromParent();
+ continue;
+ }
+
+ CSEMap[In] = In;
+ }
+}
+
void
InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
//===------------------------------------------------===//
@@ -1750,6 +2166,8 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
LoopVectorizationLegality::ReductionDescriptor RdxDesc =
(*Legal->getReductionVars())[RdxPhi];
+ setDebugLocFromInst(Builder, RdxDesc.StartValue);
+
// We need to generate a reduction vector from the incoming scalar.
// To do so, we need to generate the 'identity' vector and overide
// one of the elements with the incoming scalar reduction. We need
@@ -1767,18 +2185,31 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
// MinMax reduction have the start value as their identify.
- VectorStart = Identity = Builder.CreateVectorSplat(VF, RdxDesc.StartValue,
- "minmax.ident");
+ if (VF == 1) {
+ VectorStart = Identity = RdxDesc.StartValue;
+ } else {
+ VectorStart = Identity = Builder.CreateVectorSplat(VF,
+ RdxDesc.StartValue,
+ "minmax.ident");
+ }
} else {
+ // Handle other reduction kinds:
Constant *Iden =
- LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
- VecTy->getScalarType());
- Identity = ConstantVector::getSplat(VF, Iden);
-
- // This vector is the Identity vector where the first element is the
- // incoming scalar reduction.
- VectorStart = Builder.CreateInsertElement(Identity,
- RdxDesc.StartValue, Zero);
+ LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
+ VecTy->getScalarType());
+ if (VF == 1) {
+ Identity = Iden;
+ // This vector is the Identity vector where the first element is the
+ // incoming scalar reduction.
+ VectorStart = RdxDesc.StartValue;
+ } else {
+ Identity = ConstantVector::getSplat(VF, Iden);
+
+ // This vector is the Identity vector where the first element is the
+ // incoming scalar reduction.
+ VectorStart = Builder.CreateInsertElement(Identity,
+ RdxDesc.StartValue, Zero);
+ }
}
// Fix the vector-loop phi.
@@ -1793,7 +2224,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
Value *LoopVal = RdxPhi->getIncomingValueForBlock(Latch);
VectorParts &Val = getVectorValue(LoopVal);
for (unsigned part = 0; part < UF; ++part) {
- // Make sure to add the reduction stat value only to the
+ // Make sure to add the reduction stat value only to the
// first unroll part.
Value *StartVal = (part == 0) ? VectorStart : Identity;
cast<PHINode>(VecRdxPhi[part])->addIncoming(StartVal, VecPreheader);
@@ -1807,6 +2238,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt());
VectorParts RdxParts;
+ setDebugLocFromInst(Builder, RdxDesc.LoopExitInstr);
for (unsigned part = 0; part < UF; ++part) {
// This PHINode contains the vectorized reduction variable, or
// the initial value vector, if we bypass the vector loop.
@@ -1822,6 +2254,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// Reduce all of the unrolled parts into a single vector.
Value *ReducedPartRdx = RdxParts[0];
unsigned Op = getReductionBinOp(RdxDesc.Kind);
+ setDebugLocFromInst(Builder, ReducedPartRdx);
for (unsigned part = 1; part < UF; ++part) {
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
@@ -1832,37 +2265,40 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
ReducedPartRdx, RdxParts[part]);
}
- // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
- // and vector ops, reducing the set of values being computed by half each
- // round.
- assert(isPowerOf2_32(VF) &&
- "Reduction emission only supported for pow2 vectors!");
- Value *TmpVec = ReducedPartRdx;
- SmallVector<Constant*, 32> ShuffleMask(VF, 0);
- for (unsigned i = VF; i != 1; i >>= 1) {
- // Move the upper half of the vector to the lower half.
- for (unsigned j = 0; j != i/2; ++j)
- ShuffleMask[j] = Builder.getInt32(i/2 + j);
-
- // Fill the rest of the mask with undef.
- std::fill(&ShuffleMask[i/2], ShuffleMask.end(),
- UndefValue::get(Builder.getInt32Ty()));
-
- Value *Shuf =
+ if (VF > 1) {
+ // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
+ // and vector ops, reducing the set of values being computed by half each
+ // round.
+ assert(isPowerOf2_32(VF) &&
+ "Reduction emission only supported for pow2 vectors!");
+ Value *TmpVec = ReducedPartRdx;
+ SmallVector<Constant*, 32> ShuffleMask(VF, 0);
+ for (unsigned i = VF; i != 1; i >>= 1) {
+ // Move the upper half of the vector to the lower half.
+ for (unsigned j = 0; j != i/2; ++j)
+ ShuffleMask[j] = Builder.getInt32(i/2 + j);
+
+ // Fill the rest of the mask with undef.
+ std::fill(&ShuffleMask[i/2], ShuffleMask.end(),
+ UndefValue::get(Builder.getInt32Ty()));
+
+ Value *Shuf =
Builder.CreateShuffleVector(TmpVec,
UndefValue::get(TmpVec->getType()),
ConstantVector::get(ShuffleMask),
"rdx.shuf");
- if (Op != Instruction::ICmp && Op != Instruction::FCmp)
- TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
- "bin.rdx");
- else
- TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
- }
+ if (Op != Instruction::ICmp && Op != Instruction::FCmp)
+ TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
+ "bin.rdx");
+ else
+ TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
+ }
- // The result is in the first element of the vector.
- Value *Scalar0 = Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
+ // The result is in the first element of the vector.
+ ReducedPartRdx = Builder.CreateExtractElement(TmpVec,
+ Builder.getInt32(0));
+ }
// Now, we need to fix the users of the reduction variable
// inside and outside of the scalar remainder loop.
@@ -1871,7 +2307,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
- if (!LCSSAPhi) continue;
+ if (!LCSSAPhi) break;
// All PHINodes need to have a single entry edge, or two if
// we already fixed them.
@@ -1881,7 +2317,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// incoming bypass edge.
if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
// Add an edge coming from the bypass.
- LCSSAPhi->addIncoming(Scalar0, LoopMiddleBlock);
+ LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
break;
}
}// end of the LCSSA phi scan.
@@ -1893,29 +2329,38 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
// Pick the other block.
int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
- (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0);
+ (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx);
(RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
}// end of for each redux variable.
- // The Loop exit block may have single value PHI nodes where the incoming
- // value is 'undef'. While vectorizing we only handled real values that
- // were defined inside the loop. Here we handle the 'undef case'.
- // See PR14725.
+ fixLCSSAPHIs();
+
+ // Remove redundant induction instructions.
+ cse(LoopVectorBody);
+}
+
+void InnerLoopVectorizer::fixLCSSAPHIs() {
for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
- if (!LCSSAPhi) continue;
+ if (!LCSSAPhi) break;
if (LCSSAPhi->getNumIncomingValues() == 1)
LCSSAPhi->addIncoming(UndefValue::get(LCSSAPhi->getType()),
LoopMiddleBlock);
}
-}
+}
InnerLoopVectorizer::VectorParts
InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
assert(std::find(pred_begin(Dst), pred_end(Dst), Src) != pred_end(Dst) &&
"Invalid edge");
+ // Look for cached value.
+ std::pair<BasicBlock*, BasicBlock*> Edge(Src, Dst);
+ EdgeMaskCache::iterator ECEntryIt = MaskCache.find(Edge);
+ if (ECEntryIt != MaskCache.end())
+ return ECEntryIt->second;
+
VectorParts SrcMask = createBlockInMask(Src);
// The terminator has to be a branch inst!
@@ -1931,9 +2376,12 @@ InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
for (unsigned part = 0; part < UF; ++part)
EdgeMask[part] = Builder.CreateAnd(EdgeMask[part], SrcMask[part]);
+
+ MaskCache[Edge] = EdgeMask;
return EdgeMask;
}
+ MaskCache[Edge] = SrcMask;
return SrcMask;
}
@@ -1961,154 +2409,185 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
return BlockMask;
}
-void
-InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
- BasicBlock *BB, PhiVector *PV) {
- // For each instruction in the old loop.
- for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
- VectorParts &Entry = WidenMap.get(it);
- switch (it->getOpcode()) {
- case Instruction::Br:
- // Nothing to do for PHIs and BR, since we already took care of the
- // loop control flow instructions.
- continue;
- case Instruction::PHI:{
- PHINode* P = cast<PHINode>(it);
- // Handle reduction variables:
- if (Legal->getReductionVars()->count(P)) {
- for (unsigned part = 0; part < UF; ++part) {
- // This is phase one of vectorizing PHIs.
- Type *VecTy = VectorType::get(it->getType(), VF);
- Entry[part] = PHINode::Create(VecTy, 2, "vec.phi",
- LoopVectorBody-> getFirstInsertionPt());
- }
- PV->push_back(P);
- continue;
- }
+void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
+ InnerLoopVectorizer::VectorParts &Entry,
+ LoopVectorizationLegality *Legal,
+ unsigned UF, unsigned VF, PhiVector *PV) {
+ PHINode* P = cast<PHINode>(PN);
+ // Handle reduction variables:
+ if (Legal->getReductionVars()->count(P)) {
+ for (unsigned part = 0; part < UF; ++part) {
+ // This is phase one of vectorizing PHIs.
+ Type *VecTy = (VF == 1) ? PN->getType() :
+ VectorType::get(PN->getType(), VF);
+ Entry[part] = PHINode::Create(VecTy, 2, "vec.phi",
+ LoopVectorBody-> getFirstInsertionPt());
+ }
+ PV->push_back(P);
+ return;
+ }
- // Check for PHI nodes that are lowered to vector selects.
- if (P->getParent() != OrigLoop->getHeader()) {
- // We know that all PHIs in non header blocks are converted into
- // selects, so we don't have to worry about the insertion order and we
- // can just use the builder.
- // At this point we generate the predication tree. There may be
- // duplications since this is a simple recursive scan, but future
- // optimizations will clean it up.
-
- unsigned NumIncoming = P->getNumIncomingValues();
- assert(NumIncoming > 1 && "Invalid PHI");
-
- // Generate a sequence of selects of the form:
- // SELECT(Mask3, In3,
- // SELECT(Mask2, In2,
- // ( ...)))
- for (unsigned In = 0; In < NumIncoming; In++) {
- VectorParts Cond = createEdgeMask(P->getIncomingBlock(In),
- P->getParent());
- VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
-
- for (unsigned part = 0; part < UF; ++part) {
- // We don't need to 'select' the first PHI operand because it is
- // the default value if all of the other masks don't match.
- if (In == 0)
- Entry[part] = In0[part];
- else
- // Select between the current value and the previous incoming edge
- // based on the incoming mask.
- Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
- Entry[part], "predphi");
- }
- }
- continue;
+ setDebugLocFromInst(Builder, P);
+ // Check for PHI nodes that are lowered to vector selects.
+ if (P->getParent() != OrigLoop->getHeader()) {
+ // We know that all PHIs in non header blocks are converted into
+ // selects, so we don't have to worry about the insertion order and we
+ // can just use the builder.
+ // At this point we generate the predication tree. There may be
+ // duplications since this is a simple recursive scan, but future
+ // optimizations will clean it up.
+
+ unsigned NumIncoming = P->getNumIncomingValues();
+
+ // Generate a sequence of selects of the form:
+ // SELECT(Mask3, In3,
+ // SELECT(Mask2, In2,
+ // ( ...)))
+ for (unsigned In = 0; In < NumIncoming; In++) {
+ VectorParts Cond = createEdgeMask(P->getIncomingBlock(In),
+ P->getParent());
+ VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
+
+ for (unsigned part = 0; part < UF; ++part) {
+ // We might have single edge PHIs (blocks) - use an identity
+ // 'select' for the first PHI operand.
+ if (In == 0)
+ Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
+ In0[part]);
+ else
+ // Select between the current value and the previous incoming edge
+ // based on the incoming mask.
+ Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
+ Entry[part], "predphi");
}
+ }
+ return;
+ }
- // This PHINode must be an induction variable.
- // Make sure that we know about it.
- assert(Legal->getInductionVars()->count(P) &&
- "Not an induction variable");
+ // This PHINode must be an induction variable.
+ // Make sure that we know about it.
+ assert(Legal->getInductionVars()->count(P) &&
+ "Not an induction variable");
- LoopVectorizationLegality::InductionInfo II =
- Legal->getInductionVars()->lookup(P);
+ LoopVectorizationLegality::InductionInfo II =
+ Legal->getInductionVars()->lookup(P);
- switch (II.IK) {
- case LoopVectorizationLegality::IK_NoInduction:
- llvm_unreachable("Unknown induction");
- case LoopVectorizationLegality::IK_IntInduction: {
- assert(P == OldInduction && "Unexpected PHI");
- Value *Broadcasted = getBroadcastInstrs(Induction);
+ switch (II.IK) {
+ case LoopVectorizationLegality::IK_NoInduction:
+ llvm_unreachable("Unknown induction");
+ case LoopVectorizationLegality::IK_IntInduction: {
+ assert(P->getType() == II.StartValue->getType() && "Types must match");
+ Type *PhiTy = P->getType();
+ Value *Broadcasted;
+ if (P == OldInduction) {
+ // Handle the canonical induction variable. We might have had to
+ // extend the type.
+ Broadcasted = Builder.CreateTrunc(Induction, PhiTy);
+ } else {
+ // Handle other induction variables that are now based on the
+ // canonical one.
+ Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx,
+ "normalized.idx");
+ NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy);
+ Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx,
+ "offset.idx");
+ }
+ Broadcasted = getBroadcastInstrs(Broadcasted);
+ // After broadcasting the induction variable we need to make the vector
+ // consecutive by adding 0, 1, 2, etc.
+ for (unsigned part = 0; part < UF; ++part)
+ Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false);
+ return;
+ }
+ case LoopVectorizationLegality::IK_ReverseIntInduction:
+ case LoopVectorizationLegality::IK_PtrInduction:
+ case LoopVectorizationLegality::IK_ReversePtrInduction:
+ // Handle reverse integer and pointer inductions.
+ Value *StartIdx = ExtendedIdx;
+ // This is the normalized GEP that starts counting at zero.
+ Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
+ "normalized.idx");
+
+ // Handle the reverse integer induction variable case.
+ if (LoopVectorizationLegality::IK_ReverseIntInduction == II.IK) {
+ IntegerType *DstTy = cast<IntegerType>(II.StartValue->getType());
+ Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy,
+ "resize.norm.idx");
+ Value *ReverseInd = Builder.CreateSub(II.StartValue, CNI,
+ "reverse.idx");
+
+ // This is a new value so do not hoist it out.
+ Value *Broadcasted = getBroadcastInstrs(ReverseInd);
// After broadcasting the induction variable we need to make the
- // vector consecutive by adding 0, 1, 2 ...
+ // vector consecutive by adding ... -3, -2, -1, 0.
for (unsigned part = 0; part < UF; ++part)
- Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false);
- continue;
+ Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part,
+ true);
+ return;
}
- case LoopVectorizationLegality::IK_ReverseIntInduction:
- case LoopVectorizationLegality::IK_PtrInduction:
- case LoopVectorizationLegality::IK_ReversePtrInduction:
- // Handle reverse integer and pointer inductions.
- Value *StartIdx = 0;
- // If we have a single integer induction variable then use it.
- // Otherwise, start counting at zero.
- if (OldInduction) {
- LoopVectorizationLegality::InductionInfo OldII =
- Legal->getInductionVars()->lookup(OldInduction);
- StartIdx = OldII.StartValue;
- } else {
- StartIdx = ConstantInt::get(Induction->getType(), 0);
- }
- // This is the normalized GEP that starts counting at zero.
- Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
- "normalized.idx");
- // Handle the reverse integer induction variable case.
- if (LoopVectorizationLegality::IK_ReverseIntInduction == II.IK) {
- IntegerType *DstTy = cast<IntegerType>(II.StartValue->getType());
- Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy,
- "resize.norm.idx");
- Value *ReverseInd = Builder.CreateSub(II.StartValue, CNI,
- "reverse.idx");
-
- // This is a new value so do not hoist it out.
- Value *Broadcasted = getBroadcastInstrs(ReverseInd);
- // After broadcasting the induction variable we need to make the
- // vector consecutive by adding ... -3, -2, -1, 0.
- for (unsigned part = 0; part < UF; ++part)
- Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part,
- true);
+ // Handle the pointer induction variable case.
+ assert(P->getType()->isPointerTy() && "Unexpected type.");
+
+ // Is this a reverse induction ptr or a consecutive induction ptr.
+ bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction ==
+ II.IK);
+
+ // This is the vector of results. Notice that we don't generate
+ // vector geps because scalar geps result in better code.
+ for (unsigned part = 0; part < UF; ++part) {
+ if (VF == 1) {
+ int EltIndex = (part) * (Reverse ? -1 : 1);
+ Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
+ Value *GlobalIdx;
+ if (Reverse)
+ GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
+ else
+ GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
+
+ Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
+ "next.gep");
+ Entry[part] = SclrGep;
continue;
}
- // Handle the pointer induction variable case.
- assert(P->getType()->isPointerTy() && "Unexpected type.");
-
- // Is this a reverse induction ptr or a consecutive induction ptr.
- bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction ==
- II.IK);
-
- // This is the vector of results. Notice that we don't generate
- // vector geps because scalar geps result in better code.
- for (unsigned part = 0; part < UF; ++part) {
- Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
- for (unsigned int i = 0; i < VF; ++i) {
- int EltIndex = (i + part * VF) * (Reverse ? -1 : 1);
- Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
- Value *GlobalIdx;
- if (!Reverse)
- GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
- else
- GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
-
- Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
- "next.gep");
- VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
- Builder.getInt32(i),
- "insert.gep");
- }
- Entry[part] = VecVal;
+ Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
+ for (unsigned int i = 0; i < VF; ++i) {
+ int EltIndex = (i + part * VF) * (Reverse ? -1 : 1);
+ Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
+ Value *GlobalIdx;
+ if (!Reverse)
+ GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
+ else
+ GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
+
+ Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
+ "next.gep");
+ VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
+ Builder.getInt32(i),
+ "insert.gep");
}
- continue;
+ Entry[part] = VecVal;
}
+ return;
+ }
+}
+void
+InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
+ BasicBlock *BB, PhiVector *PV) {
+ // For each instruction in the old loop.
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ VectorParts &Entry = WidenMap.get(it);
+ switch (it->getOpcode()) {
+ case Instruction::Br:
+ // Nothing to do for PHIs and BR, since we already took care of the
+ // loop control flow instructions.
+ continue;
+ case Instruction::PHI:{
+ // Vectorize PHINodes.
+ widenPHIInstruction(it, Entry, Legal, UF, VF, PV);
+ continue;
}// End of PHI.
case Instruction::Add:
@@ -2131,6 +2610,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
case Instruction::Xor: {
// Just widen binops.
BinaryOperator *BinOp = dyn_cast<BinaryOperator>(it);
+ setDebugLocFromInst(Builder, BinOp);
VectorParts &A = getVectorValue(it->getOperand(0));
VectorParts &B = getVectorValue(it->getOperand(1));
@@ -2157,6 +2637,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
// instruction with a scalar condition. Otherwise, use vector-select.
bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(it->getOperand(0)),
OrigLoop);
+ setDebugLocFromInst(Builder, it);
// The condition can be loop invariant but still defined inside the
// loop. This means that we can't just use the original 'cond' value.
@@ -2165,8 +2646,10 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
VectorParts &Cond = getVectorValue(it->getOperand(0));
VectorParts &Op0 = getVectorValue(it->getOperand(1));
VectorParts &Op1 = getVectorValue(it->getOperand(2));
- Value *ScalarCond = Builder.CreateExtractElement(Cond[0],
- Builder.getInt32(0));
+
+ Value *ScalarCond = (VF == 1) ? Cond[0] :
+ Builder.CreateExtractElement(Cond[0], Builder.getInt32(0));
+
for (unsigned Part = 0; Part < UF; ++Part) {
Entry[Part] = Builder.CreateSelect(
InvariantCond ? ScalarCond : Cond[Part],
@@ -2181,6 +2664,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
// Widen compares. Generate vector compares.
bool FCmp = (it->getOpcode() == Instruction::FCmp);
CmpInst *Cmp = dyn_cast<CmpInst>(it);
+ setDebugLocFromInst(Builder, it);
VectorParts &A = getVectorValue(it->getOperand(0));
VectorParts &B = getVectorValue(it->getOperand(1));
for (unsigned Part = 0; Part < UF; ++Part) {
@@ -2211,6 +2695,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
case Instruction::FPTrunc:
case Instruction::BitCast: {
CastInst *CI = dyn_cast<CastInst>(it);
+ setDebugLocFromInst(Builder, it);
/// Optimize the special case where the source is the induction
/// variable. Notice that we can only optimize the 'trunc' case
/// because: a. FP conversions lose precision, b. sext/zext may wrap,
@@ -2225,7 +2710,8 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
break;
}
/// Vectorize casts.
- Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF);
+ Type *DestTy = (VF == 1) ? CI->getType() :
+ VectorType::get(CI->getType(), VF);
VectorParts &A = getVectorValue(it->getOperand(0));
for (unsigned Part = 0; Part < UF; ++Part)
@@ -2237,20 +2723,32 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
// Ignore dbg intrinsics.
if (isa<DbgInfoIntrinsic>(it))
break;
+ setDebugLocFromInst(Builder, it);
Module *M = BB->getParent()->getParent();
CallInst *CI = cast<CallInst>(it);
Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
assert(ID && "Not an intrinsic call!");
- for (unsigned Part = 0; Part < UF; ++Part) {
- SmallVector<Value*, 4> Args;
- for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
- VectorParts &Arg = getVectorValue(CI->getArgOperand(i));
- Args.push_back(Arg[Part]);
+ switch (ID) {
+ case Intrinsic::lifetime_end:
+ case Intrinsic::lifetime_start:
+ scalarizeInstruction(it);
+ break;
+ default:
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ SmallVector<Value *, 4> Args;
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+ VectorParts &Arg = getVectorValue(CI->getArgOperand(i));
+ Args.push_back(Arg[Part]);
+ }
+ Type *Tys[] = {CI->getType()};
+ if (VF > 1)
+ Tys[0] = VectorType::get(CI->getType()->getScalarType(), VF);
+
+ Function *F = Intrinsic::getDeclaration(M, ID, Tys);
+ Entry[Part] = Builder.CreateCall(F, Args);
}
- Type *Tys[] = { VectorType::get(CI->getType()->getScalarType(), VF) };
- Function *F = Intrinsic::getDeclaration(M, ID, Tys);
- Entry[Part] = Builder.CreateCall(F, Args);
+ break;
}
break;
}
@@ -2283,24 +2781,65 @@ void InnerLoopVectorizer::updateAnalysis() {
DEBUG(DT->verifyAnalysis());
}
+/// \brief Check whether it is safe to if-convert this phi node.
+///
+/// Phi nodes with constant expressions that can trap are not safe to if
+/// convert.
+static bool canIfConvertPHINodes(BasicBlock *BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ PHINode *Phi = dyn_cast<PHINode>(I);
+ if (!Phi)
+ return true;
+ for (unsigned p = 0, e = Phi->getNumIncomingValues(); p != e; ++p)
+ if (Constant *C = dyn_cast<Constant>(Phi->getIncomingValue(p)))
+ if (C->canTrap())
+ return false;
+ }
+ return true;
+}
+
bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
if (!EnableIfConversion)
return false;
assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
- std::vector<BasicBlock*> &LoopBlocks = TheLoop->getBlocksVector();
+
+ // A list of pointers that we can safely read and write to.
+ SmallPtrSet<Value *, 8> SafePointes;
+
+ // Collect safe addresses.
+ for (Loop::block_iterator BI = TheLoop->block_begin(),
+ BE = TheLoop->block_end(); BI != BE; ++BI) {
+ BasicBlock *BB = *BI;
+
+ if (blockNeedsPredication(BB))
+ continue;
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ SafePointes.insert(LI->getPointerOperand());
+ else if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ SafePointes.insert(SI->getPointerOperand());
+ }
+ }
// Collect the blocks that need predication.
- for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) {
- BasicBlock *BB = LoopBlocks[i];
+ BasicBlock *Header = TheLoop->getHeader();
+ for (Loop::block_iterator BI = TheLoop->block_begin(),
+ BE = TheLoop->block_end(); BI != BE; ++BI) {
+ BasicBlock *BB = *BI;
// We don't support switch statements inside loops.
if (!isa<BranchInst>(BB->getTerminator()))
return false;
// We must be able to predicate all blocks that need to be predicated.
- if (blockNeedsPredication(BB) && !blockCanBePredicated(BB))
+ if (blockNeedsPredication(BB)) {
+ if (!blockCanBePredicated(BB, SafePointes))
+ return false;
+ } else if (BB != Header && !canIfConvertPHINodes(BB))
return false;
+
}
// We can if-convert this loop.
@@ -2325,27 +2864,26 @@ bool LoopVectorizationLegality::canVectorize() {
if (!TheLoop->getExitingBlock())
return false;
- unsigned NumBlocks = TheLoop->getNumBlocks();
+ // We need to have a loop header.
+ DEBUG(dbgs() << "LV: Found a loop: " <<
+ TheLoop->getHeader()->getName() << '\n');
// Check if we can if-convert non single-bb loops.
+ unsigned NumBlocks = TheLoop->getNumBlocks();
if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
return false;
}
- // We need to have a loop header.
- BasicBlock *Latch = TheLoop->getLoopLatch();
- DEBUG(dbgs() << "LV: Found a loop: " <<
- TheLoop->getHeader()->getName() << "\n");
-
// ScalarEvolution needs to be able to find the exit count.
- const SCEV *ExitCount = SE->getExitCount(TheLoop, Latch);
+ const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
if (ExitCount == SE->getCouldNotCompute()) {
DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
return false;
}
// Do not loop-vectorize loops with a tiny trip count.
+ BasicBlock *Latch = TheLoop->getLoopLatch();
unsigned TC = SE->getSmallConstantTripCount(TheLoop, Latch);
if (TC > 0u && TC < TinyTripCountVectorThreshold) {
DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " <<
@@ -2378,6 +2916,26 @@ bool LoopVectorizationLegality::canVectorize() {
return true;
}
+static Type *convertPointerToIntegerType(DataLayout &DL, Type *Ty) {
+ if (Ty->isPointerTy())
+ return DL.getIntPtrType(Ty);
+
+ // It is possible that char's or short's overflow when we ask for the loop's
+ // trip count, work around this by changing the type size.
+ if (Ty->getScalarSizeInBits() < 32)
+ return Type::getInt32Ty(Ty->getContext());
+
+ return Ty;
+}
+
+static Type* getWiderType(DataLayout &DL, Type *Ty0, Type *Ty1) {
+ Ty0 = convertPointerToIntegerType(DL, Ty0);
+ Ty1 = convertPointerToIntegerType(DL, Ty1);
+ if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
+ return Ty0;
+ return Ty1;
+}
+
/// \brief Check that the instruction has outside loop users and is not an
/// identified reduction variable.
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
@@ -2391,7 +2949,7 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
Instruction *U = cast<Instruction>(*I);
// This user may be a reduction exit value.
if (!TheLoop->contains(U)) {
- DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
+ DEBUG(dbgs() << "LV: Found an outside user for : " << *U << '\n');
return true;
}
}
@@ -2402,13 +2960,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
BasicBlock *PreHeader = TheLoop->getLoopPreheader();
BasicBlock *Header = TheLoop->getHeader();
- // If we marked the scalar loop as "already vectorized" then no need
- // to vectorize it again.
- if (Header->getTerminator()->getMetadata(AlreadyVectorizedMDName)) {
- DEBUG(dbgs() << "LV: This loop was vectorized before\n");
- return false;
- }
-
// Look for the attribute signaling the absence of NaNs.
Function &F = *Header->getParent();
if (F.hasFnAttribute("no-nans-fp-math"))
@@ -2425,10 +2976,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
++it) {
if (PHINode *Phi = dyn_cast<PHINode>(it)) {
+ Type *PhiTy = Phi->getType();
// Check that this PHI type is allowed.
- if (!Phi->getType()->isIntegerTy() &&
- !Phi->getType()->isFloatingPointTy() &&
- !Phi->getType()->isPointerTy()) {
+ if (!PhiTy->isIntegerTy() &&
+ !PhiTy->isFloatingPointTy() &&
+ !PhiTy->isPointerTy()) {
DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
return false;
}
@@ -2456,17 +3008,29 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
InductionKind IK = isInductionVariable(Phi);
if (IK_NoInduction != IK) {
+ // Get the widest type.
+ if (!WidestIndTy)
+ WidestIndTy = convertPointerToIntegerType(*DL, PhiTy);
+ else
+ WidestIndTy = getWiderType(*DL, PhiTy, WidestIndTy);
+
// Int inductions are special because we only allow one IV.
if (IK == IK_IntInduction) {
- if (Induction) {
- DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n");
- return false;
- }
- Induction = Phi;
+ // Use the phi node with the widest type as induction. Use the last
+ // one if there are multiple (no good reason for doing this other
+ // than it is expedient).
+ if (!Induction || PhiTy == WidestIndTy)
+ Induction = Phi;
}
DEBUG(dbgs() << "LV: Found an induction variable.\n");
Inductions[Phi] = InductionInfo(StartValue, IK);
+
+ // Until we explicitly handle the case of an induction variable with
+ // an outside loop user we have to give up vectorizing this loop.
+ if (hasOutsideLoopUser(TheLoop, it, AllowedExit))
+ return false;
+
continue;
}
@@ -2503,7 +3067,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
continue;
}
if (AddReductionVar(Phi, RK_FloatMinMax)) {
- DEBUG(dbgs() << "LV: Found an float MINMAX reduction PHI."<< *Phi <<"\n");
+ DEBUG(dbgs() << "LV: Found an float MINMAX reduction PHI."<< *Phi <<
+ "\n");
continue;
}
@@ -2520,9 +3085,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
}
// Check that the instruction return type is vectorizable.
- if (!VectorType::isValidElementType(it->getType()) &&
- !it->getType()->isVoidTy()) {
- DEBUG(dbgs() << "LV: Found unvectorizable type." << "\n");
+ // Also, we can't vectorize extractelement instructions.
+ if ((!VectorType::isValidElementType(it->getType()) &&
+ !it->getType()->isVoidTy()) || isa<ExtractElementInst>(it)) {
+ DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
return false;
}
@@ -2544,7 +3110,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (!Induction) {
DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
- assert(getInductionVars()->size() && "No induction variables");
+ if (Inductions.empty())
+ return false;
}
return true;
@@ -2573,59 +3140,715 @@ void LoopVectorizationLegality::collectLoopUniforms() {
Uniforms.insert(I);
// Insert all operands.
- for (int i = 0, Op = I->getNumOperands(); i < Op; ++i) {
- Worklist.push_back(I->getOperand(i));
- }
+ Worklist.insert(Worklist.end(), I->op_begin(), I->op_end());
}
}
-AliasAnalysis::Location
-LoopVectorizationLegality::getLoadStoreLocation(Instruction *Inst) {
- if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
- return AA->getLocation(Store);
- else if (LoadInst *Load = dyn_cast<LoadInst>(Inst))
- return AA->getLocation(Load);
+namespace {
+/// \brief Analyses memory accesses in a loop.
+///
+/// Checks whether run time pointer checks are needed and builds sets for data
+/// dependence checking.
+class AccessAnalysis {
+public:
+ /// \brief Read or write access location.
+ typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
+ typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
+
+ /// \brief Set of potential dependent memory accesses.
+ typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
+
+ AccessAnalysis(DataLayout *Dl, DepCandidates &DA) :
+ DL(Dl), DepCands(DA), AreAllWritesIdentified(true),
+ AreAllReadsIdentified(true), IsRTCheckNeeded(false) {}
+
+ /// \brief Register a load and whether it is only read from.
+ void addLoad(Value *Ptr, bool IsReadOnly) {
+ Accesses.insert(MemAccessInfo(Ptr, false));
+ if (IsReadOnly)
+ ReadOnlyPtr.insert(Ptr);
+ }
- llvm_unreachable("Should be either load or store instruction");
+ /// \brief Register a store.
+ void addStore(Value *Ptr) {
+ Accesses.insert(MemAccessInfo(Ptr, true));
+ }
+
+ /// \brief Check whether we can check the pointers at runtime for
+ /// non-intersection.
+ bool canCheckPtrAtRT(LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
+ unsigned &NumComparisons, ScalarEvolution *SE,
+ Loop *TheLoop, bool ShouldCheckStride = false);
+
+ /// \brief Goes over all memory accesses, checks whether a RT check is needed
+ /// and builds sets of dependent accesses.
+ void buildDependenceSets() {
+ // Process read-write pointers first.
+ processMemAccesses(false);
+ // Next, process read pointers.
+ processMemAccesses(true);
+ }
+
+ bool isRTCheckNeeded() { return IsRTCheckNeeded; }
+
+ bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
+ void resetDepChecks() { CheckDeps.clear(); }
+
+ MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
+
+private:
+ typedef SetVector<MemAccessInfo> PtrAccessSet;
+ typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
+
+ /// \brief Go over all memory access or only the deferred ones if
+ /// \p UseDeferred is true and check whether runtime pointer checks are needed
+ /// and build sets of dependency check candidates.
+ void processMemAccesses(bool UseDeferred);
+
+ /// Set of all accesses.
+ PtrAccessSet Accesses;
+
+ /// Set of access to check after all writes have been processed.
+ PtrAccessSet DeferredAccesses;
+
+ /// Map of pointers to last access encountered.
+ UnderlyingObjToAccessMap ObjToLastAccess;
+
+ /// Set of accesses that need a further dependence check.
+ MemAccessInfoSet CheckDeps;
+
+ /// Set of pointers that are read only.
+ SmallPtrSet<Value*, 16> ReadOnlyPtr;
+
+ /// Set of underlying objects already written to.
+ SmallPtrSet<Value*, 16> WriteObjects;
+
+ DataLayout *DL;
+
+ /// Sets of potentially dependent accesses - members of one set share an
+ /// underlying pointer. The set "CheckDeps" identfies which sets really need a
+ /// dependence check.
+ DepCandidates &DepCands;
+
+ bool AreAllWritesIdentified;
+ bool AreAllReadsIdentified;
+ bool IsRTCheckNeeded;
+};
+
+} // end anonymous namespace
+
+/// \brief Check whether a pointer can participate in a runtime bounds check.
+static bool hasComputableBounds(ScalarEvolution *SE, Value *Ptr) {
+ const SCEV *PtrScev = SE->getSCEV(Ptr);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
+ if (!AR)
+ return false;
+
+ return AR->isAffine();
}
-bool
-LoopVectorizationLegality::hasPossibleGlobalWriteReorder(
- Value *Object,
- Instruction *Inst,
- AliasMultiMap& WriteObjects,
- unsigned MaxByteWidth) {
+/// \brief Check the stride of the pointer and ensure that it does not wrap in
+/// the address space.
+static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value *Ptr,
+ const Loop *Lp);
+
+bool AccessAnalysis::canCheckPtrAtRT(
+ LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
+ unsigned &NumComparisons, ScalarEvolution *SE,
+ Loop *TheLoop, bool ShouldCheckStride) {
+ // Find pointers with computable bounds. We are going to use this information
+ // to place a runtime bound check.
+ unsigned NumReadPtrChecks = 0;
+ unsigned NumWritePtrChecks = 0;
+ bool CanDoRT = true;
+
+ bool IsDepCheckNeeded = isDependencyCheckNeeded();
+ // We assign consecutive id to access from different dependence sets.
+ // Accesses within the same set don't need a runtime check.
+ unsigned RunningDepId = 1;
+ DenseMap<Value *, unsigned> DepSetId;
+
+ for (PtrAccessSet::iterator AI = Accesses.begin(), AE = Accesses.end();
+ AI != AE; ++AI) {
+ const MemAccessInfo &Access = *AI;
+ Value *Ptr = Access.getPointer();
+ bool IsWrite = Access.getInt();
+
+ // Just add write checks if we have both.
+ if (!IsWrite && Accesses.count(MemAccessInfo(Ptr, true)))
+ continue;
+
+ if (IsWrite)
+ ++NumWritePtrChecks;
+ else
+ ++NumReadPtrChecks;
+
+ if (hasComputableBounds(SE, Ptr) &&
+ // When we run after a failing dependency check we have to make sure we
+ // don't have wrapping pointers.
+ (!ShouldCheckStride || isStridedPtr(SE, DL, Ptr, TheLoop) == 1)) {
+ // The id of the dependence set.
+ unsigned DepId;
+
+ if (IsDepCheckNeeded) {
+ Value *Leader = DepCands.getLeaderValue(Access).getPointer();
+ unsigned &LeaderId = DepSetId[Leader];
+ if (!LeaderId)
+ LeaderId = RunningDepId++;
+ DepId = LeaderId;
+ } else
+ // Each access has its own dependence set.
+ DepId = RunningDepId++;
+
+ RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId);
+
+ DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr << '\n');
+ } else {
+ CanDoRT = false;
+ }
+ }
- AliasAnalysis::Location ThisLoc = getLoadStoreLocation(Inst);
+ if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2)
+ NumComparisons = 0; // Only one dependence set.
+ else {
+ NumComparisons = (NumWritePtrChecks * (NumReadPtrChecks +
+ NumWritePtrChecks - 1));
+ }
- std::vector<Instruction*>::iterator
- it = WriteObjects[Object].begin(),
- end = WriteObjects[Object].end();
+ // If the pointers that we would use for the bounds comparison have different
+ // address spaces, assume the values aren't directly comparable, so we can't
+ // use them for the runtime check. We also have to assume they could
+ // overlap. In the future there should be metadata for whether address spaces
+ // are disjoint.
+ unsigned NumPointers = RtCheck.Pointers.size();
+ for (unsigned i = 0; i < NumPointers; ++i) {
+ for (unsigned j = i + 1; j < NumPointers; ++j) {
+ // Only need to check pointers between two different dependency sets.
+ if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j])
+ continue;
+
+ Value *PtrI = RtCheck.Pointers[i];
+ Value *PtrJ = RtCheck.Pointers[j];
+
+ unsigned ASi = PtrI->getType()->getPointerAddressSpace();
+ unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
+ if (ASi != ASj) {
+ DEBUG(dbgs() << "LV: Runtime check would require comparison between"
+ " different address spaces\n");
+ return false;
+ }
+ }
+ }
+
+ return CanDoRT;
+}
+
+static bool isFunctionScopeIdentifiedObject(Value *Ptr) {
+ return isNoAliasArgument(Ptr) || isNoAliasCall(Ptr) || isa<AllocaInst>(Ptr);
+}
- for (; it != end; ++it) {
- Instruction* I = *it;
- if (I == Inst)
+void AccessAnalysis::processMemAccesses(bool UseDeferred) {
+ // We process the set twice: first we process read-write pointers, last we
+ // process read-only pointers. This allows us to skip dependence tests for
+ // read-only pointers.
+
+ PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
+ for (PtrAccessSet::iterator AI = S.begin(), AE = S.end(); AI != AE; ++AI) {
+ const MemAccessInfo &Access = *AI;
+ Value *Ptr = Access.getPointer();
+ bool IsWrite = Access.getInt();
+
+ DepCands.insert(Access);
+
+ // Memorize read-only pointers for later processing and skip them in the
+ // first round (they need to be checked after we have seen all write
+ // pointers). Note: we also mark pointer that are not consecutive as
+ // "read-only" pointers (so that we check "a[b[i]] +="). Hence, we need the
+ // second check for "!IsWrite".
+ bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
+ if (!UseDeferred && IsReadOnlyPtr) {
+ DeferredAccesses.insert(Access);
continue;
+ }
+
+ bool NeedDepCheck = false;
+ // Check whether there is the possiblity of dependency because of underlying
+ // objects being the same.
+ typedef SmallVector<Value*, 16> ValueVector;
+ ValueVector TempObjects;
+ GetUnderlyingObjects(Ptr, TempObjects, DL);
+ for (ValueVector::iterator UI = TempObjects.begin(), UE = TempObjects.end();
+ UI != UE; ++UI) {
+ Value *UnderlyingObj = *UI;
+
+ // If this is a write then it needs to be an identified object. If this a
+ // read and all writes (so far) are identified function scope objects we
+ // don't need an identified underlying object but only an Argument (the
+ // next write is going to invalidate this assumption if it is
+ // unidentified).
+ // This is a micro-optimization for the case where all writes are
+ // identified and we have one argument pointer.
+ // Otherwise, we do need a runtime check.
+ if ((IsWrite && !isFunctionScopeIdentifiedObject(UnderlyingObj)) ||
+ (!IsWrite && (!AreAllWritesIdentified ||
+ !isa<Argument>(UnderlyingObj)) &&
+ !isIdentifiedObject(UnderlyingObj))) {
+ DEBUG(dbgs() << "LV: Found an unidentified " <<
+ (IsWrite ? "write" : "read" ) << " ptr: " << *UnderlyingObj <<
+ "\n");
+ IsRTCheckNeeded = (IsRTCheckNeeded ||
+ !isIdentifiedObject(UnderlyingObj) ||
+ !AreAllReadsIdentified);
+
+ if (IsWrite)
+ AreAllWritesIdentified = false;
+ if (!IsWrite)
+ AreAllReadsIdentified = false;
+ }
+
+ // If this is a write - check other reads and writes for conflicts. If
+ // this is a read only check other writes for conflicts (but only if there
+ // is no other write to the ptr - this is an optimization to catch "a[i] =
+ // a[i] + " without having to do a dependence check).
+ if ((IsWrite || IsReadOnlyPtr) && WriteObjects.count(UnderlyingObj))
+ NeedDepCheck = true;
+
+ if (IsWrite)
+ WriteObjects.insert(UnderlyingObj);
+
+ // Create sets of pointers connected by shared underlying objects.
+ UnderlyingObjToAccessMap::iterator Prev =
+ ObjToLastAccess.find(UnderlyingObj);
+ if (Prev != ObjToLastAccess.end())
+ DepCands.unionSets(Access, Prev->second);
+
+ ObjToLastAccess[UnderlyingObj] = Access;
+ }
+
+ if (NeedDepCheck)
+ CheckDeps.insert(Access);
+ }
+}
+
+namespace {
+/// \brief Checks memory dependences among accesses to the same underlying
+/// object to determine whether there vectorization is legal or not (and at
+/// which vectorization factor).
+///
+/// This class works under the assumption that we already checked that memory
+/// locations with different underlying pointers are "must-not alias".
+/// We use the ScalarEvolution framework to symbolically evalutate access
+/// functions pairs. Since we currently don't restructure the loop we can rely
+/// on the program order of memory accesses to determine their safety.
+/// At the moment we will only deem accesses as safe for:
+/// * A negative constant distance assuming program order.
+///
+/// Safe: tmp = a[i + 1]; OR a[i + 1] = x;
+/// a[i] = tmp; y = a[i];
+///
+/// The latter case is safe because later checks guarantuee that there can't
+/// be a cycle through a phi node (that is, we check that "x" and "y" is not
+/// the same variable: a header phi can only be an induction or a reduction, a
+/// reduction can't have a memory sink, an induction can't have a memory
+/// source). This is important and must not be violated (or we have to
+/// resort to checking for cycles through memory).
+///
+/// * A positive constant distance assuming program order that is bigger
+/// than the biggest memory access.
+///
+/// tmp = a[i] OR b[i] = x
+/// a[i+2] = tmp y = b[i+2];
+///
+/// Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively.
+///
+/// * Zero distances and all accesses have the same size.
+///
+class MemoryDepChecker {
+public:
+ typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
+ typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
+
+ MemoryDepChecker(ScalarEvolution *Se, DataLayout *Dl, const Loop *L)
+ : SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0),
+ ShouldRetryWithRuntimeCheck(false) {}
+
+ /// \brief Register the location (instructions are given increasing numbers)
+ /// of a write access.
+ void addAccess(StoreInst *SI) {
+ Value *Ptr = SI->getPointerOperand();
+ Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
+ InstMap.push_back(SI);
+ ++AccessIdx;
+ }
+
+ /// \brief Register the location (instructions are given increasing numbers)
+ /// of a write access.
+ void addAccess(LoadInst *LI) {
+ Value *Ptr = LI->getPointerOperand();
+ Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
+ InstMap.push_back(LI);
+ ++AccessIdx;
+ }
+
+ /// \brief Check whether the dependencies between the accesses are safe.
+ ///
+ /// Only checks sets with elements in \p CheckDeps.
+ bool areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
+ MemAccessInfoSet &CheckDeps);
+
+ /// \brief The maximum number of bytes of a vector register we can vectorize
+ /// the accesses safely with.
+ unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
+
+ /// \brief In same cases when the dependency check fails we can still
+ /// vectorize the loop with a dynamic array access check.
+ bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; }
+
+private:
+ ScalarEvolution *SE;
+ DataLayout *DL;
+ const Loop *InnermostLoop;
+
+ /// \brief Maps access locations (ptr, read/write) to program order.
+ DenseMap<MemAccessInfo, std::vector<unsigned> > Accesses;
+
+ /// \brief Memory access instructions in program order.
+ SmallVector<Instruction *, 16> InstMap;
+
+ /// \brief The program order index to be used for the next instruction.
+ unsigned AccessIdx;
+
+ // We can access this many bytes in parallel safely.
+ unsigned MaxSafeDepDistBytes;
+
+ /// \brief If we see a non constant dependence distance we can still try to
+ /// vectorize this loop with runtime checks.
+ bool ShouldRetryWithRuntimeCheck;
+
+ /// \brief Check whether there is a plausible dependence between the two
+ /// accesses.
+ ///
+ /// Access \p A must happen before \p B in program order. The two indices
+ /// identify the index into the program order map.
+ ///
+ /// This function checks whether there is a plausible dependence (or the
+ /// absence of such can't be proved) between the two accesses. If there is a
+ /// plausible dependence but the dependence distance is bigger than one
+ /// element access it records this distance in \p MaxSafeDepDistBytes (if this
+ /// distance is smaller than any other distance encountered so far).
+ /// Otherwise, this function returns true signaling a possible dependence.
+ bool isDependent(const MemAccessInfo &A, unsigned AIdx,
+ const MemAccessInfo &B, unsigned BIdx);
+
+ /// \brief Check whether the data dependence could prevent store-load
+ /// forwarding.
+ bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize);
+};
+
+} // end anonymous namespace
+
+static bool isInBoundsGep(Value *Ptr) {
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
+ return GEP->isInBounds();
+ return false;
+}
- AliasAnalysis::Location ThatLoc = getLoadStoreLocation(I);
- if (AA->alias(ThisLoc.getWithNewSize(MaxByteWidth),
- ThatLoc.getWithNewSize(MaxByteWidth)))
+/// \brief Check whether the access through \p Ptr has a constant stride.
+static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value *Ptr,
+ const Loop *Lp) {
+ const Type *Ty = Ptr->getType();
+ assert(Ty->isPointerTy() && "Unexpected non ptr");
+
+ // Make sure that the pointer does not point to aggregate types.
+ const PointerType *PtrTy = cast<PointerType>(Ty);
+ if (PtrTy->getElementType()->isAggregateType()) {
+ DEBUG(dbgs() << "LV: Bad stride - Not a pointer to a scalar type" << *Ptr <<
+ "\n");
+ return 0;
+ }
+
+ const SCEV *PtrScev = SE->getSCEV(Ptr);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
+ if (!AR) {
+ DEBUG(dbgs() << "LV: Bad stride - Not an AddRecExpr pointer "
+ << *Ptr << " SCEV: " << *PtrScev << "\n");
+ return 0;
+ }
+
+ // The accesss function must stride over the innermost loop.
+ if (Lp != AR->getLoop()) {
+ DEBUG(dbgs() << "LV: Bad stride - Not striding over innermost loop " <<
+ *Ptr << " SCEV: " << *PtrScev << "\n");
+ }
+
+ // The address calculation must not wrap. Otherwise, a dependence could be
+ // inverted.
+ // An inbounds getelementptr that is a AddRec with a unit stride
+ // cannot wrap per definition. The unit stride requirement is checked later.
+ // An getelementptr without an inbounds attribute and unit stride would have
+ // to access the pointer value "0" which is undefined behavior in address
+ // space 0, therefore we can also vectorize this case.
+ bool IsInBoundsGEP = isInBoundsGep(Ptr);
+ bool IsNoWrapAddRec = AR->getNoWrapFlags(SCEV::NoWrapMask);
+ bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
+ if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
+ DEBUG(dbgs() << "LV: Bad stride - Pointer may wrap in the address space "
+ << *Ptr << " SCEV: " << *PtrScev << "\n");
+ return 0;
+ }
+
+ // Check the step is constant.
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+
+ // Calculate the pointer stride and check if it is consecutive.
+ const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
+ if (!C) {
+ DEBUG(dbgs() << "LV: Bad stride - Not a constant strided " << *Ptr <<
+ " SCEV: " << *PtrScev << "\n");
+ return 0;
+ }
+
+ int64_t Size = DL->getTypeAllocSize(PtrTy->getElementType());
+ const APInt &APStepVal = C->getValue()->getValue();
+
+ // Huge step value - give up.
+ if (APStepVal.getBitWidth() > 64)
+ return 0;
+
+ int64_t StepVal = APStepVal.getSExtValue();
+
+ // Strided access.
+ int64_t Stride = StepVal / Size;
+ int64_t Rem = StepVal % Size;
+ if (Rem)
+ return 0;
+
+ // If the SCEV could wrap but we have an inbounds gep with a unit stride we
+ // know we can't "wrap around the address space". In case of address space
+ // zero we know that this won't happen without triggering undefined behavior.
+ if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
+ Stride != 1 && Stride != -1)
+ return 0;
+
+ return Stride;
+}
+
+bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
+ unsigned TypeByteSize) {
+ // If loads occur at a distance that is not a multiple of a feasible vector
+ // factor store-load forwarding does not take place.
+ // Positive dependences might cause troubles because vectorizing them might
+ // prevent store-load forwarding making vectorized code run a lot slower.
+ // a[i] = a[i-3] ^ a[i-8];
+ // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
+ // hence on your typical architecture store-load forwarding does not take
+ // place. Vectorizing in such cases does not make sense.
+ // Store-load forwarding distance.
+ const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize;
+ // Maximum vector factor.
+ unsigned MaxVFWithoutSLForwardIssues = MaxVectorWidth*TypeByteSize;
+ if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues)
+ MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes;
+
+ for (unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues;
+ vf *= 2) {
+ if (Distance % vf && Distance / vf < NumCyclesForStoreLoadThroughMemory) {
+ MaxVFWithoutSLForwardIssues = (vf >>=1);
+ break;
+ }
+ }
+
+ if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) {
+ DEBUG(dbgs() << "LV: Distance " << Distance <<
+ " that could cause a store-load forwarding conflict\n");
+ return true;
+ }
+
+ if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
+ MaxVFWithoutSLForwardIssues != MaxVectorWidth*TypeByteSize)
+ MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
+ return false;
+}
+
+bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
+ const MemAccessInfo &B, unsigned BIdx) {
+ assert (AIdx < BIdx && "Must pass arguments in program order");
+
+ Value *APtr = A.getPointer();
+ Value *BPtr = B.getPointer();
+ bool AIsWrite = A.getInt();
+ bool BIsWrite = B.getInt();
+
+ // Two reads are independent.
+ if (!AIsWrite && !BIsWrite)
+ return false;
+
+ const SCEV *AScev = SE->getSCEV(APtr);
+ const SCEV *BScev = SE->getSCEV(BPtr);
+
+ int StrideAPtr = isStridedPtr(SE, DL, APtr, InnermostLoop);
+ int StrideBPtr = isStridedPtr(SE, DL, BPtr, InnermostLoop);
+
+ const SCEV *Src = AScev;
+ const SCEV *Sink = BScev;
+
+ // If the induction step is negative we have to invert source and sink of the
+ // dependence.
+ if (StrideAPtr < 0) {
+ //Src = BScev;
+ //Sink = AScev;
+ std::swap(APtr, BPtr);
+ std::swap(Src, Sink);
+ std::swap(AIsWrite, BIsWrite);
+ std::swap(AIdx, BIdx);
+ std::swap(StrideAPtr, StrideBPtr);
+ }
+
+ const SCEV *Dist = SE->getMinusSCEV(Sink, Src);
+
+ DEBUG(dbgs() << "LV: Src Scev: " << *Src << "Sink Scev: " << *Sink
+ << "(Induction step: " << StrideAPtr << ")\n");
+ DEBUG(dbgs() << "LV: Distance for " << *InstMap[AIdx] << " to "
+ << *InstMap[BIdx] << ": " << *Dist << "\n");
+
+ // Need consecutive accesses. We don't want to vectorize
+ // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
+ // the address space.
+ if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
+ DEBUG(dbgs() << "Non-consecutive pointer access\n");
+ return true;
+ }
+
+ const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
+ if (!C) {
+ DEBUG(dbgs() << "LV: Dependence because of non constant distance\n");
+ ShouldRetryWithRuntimeCheck = true;
+ return true;
+ }
+
+ Type *ATy = APtr->getType()->getPointerElementType();
+ Type *BTy = BPtr->getType()->getPointerElementType();
+ unsigned TypeByteSize = DL->getTypeAllocSize(ATy);
+
+ // Negative distances are not plausible dependencies.
+ const APInt &Val = C->getValue()->getValue();
+ if (Val.isNegative()) {
+ bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
+ if (IsTrueDataDependence &&
+ (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
+ ATy != BTy))
return true;
+
+ DEBUG(dbgs() << "LV: Dependence is negative: NoDep\n");
+ return false;
+ }
+
+ // Write to the same location with the same size.
+ // Could be improved to assert type sizes are the same (i32 == float, etc).
+ if (Val == 0) {
+ if (ATy == BTy)
+ return false;
+ DEBUG(dbgs() << "LV: Zero dependence difference but different types\n");
+ return true;
+ }
+
+ assert(Val.isStrictlyPositive() && "Expect a positive value");
+
+ // Positive distance bigger than max vectorization factor.
+ if (ATy != BTy) {
+ DEBUG(dbgs() <<
+ "LV: ReadWrite-Write positive dependency with different types\n");
+ return false;
}
+
+ unsigned Distance = (unsigned) Val.getZExtValue();
+
+ // Bail out early if passed-in parameters make vectorization not feasible.
+ unsigned ForcedFactor = VectorizationFactor ? VectorizationFactor : 1;
+ unsigned ForcedUnroll = VectorizationUnroll ? VectorizationUnroll : 1;
+
+ // The distance must be bigger than the size needed for a vectorized version
+ // of the operation and the size of the vectorized operation must not be
+ // bigger than the currrent maximum size.
+ if (Distance < 2*TypeByteSize ||
+ 2*TypeByteSize > MaxSafeDepDistBytes ||
+ Distance < TypeByteSize * ForcedUnroll * ForcedFactor) {
+ DEBUG(dbgs() << "LV: Failure because of Positive distance "
+ << Val.getSExtValue() << '\n');
+ return true;
+ }
+
+ MaxSafeDepDistBytes = Distance < MaxSafeDepDistBytes ?
+ Distance : MaxSafeDepDistBytes;
+
+ bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
+ if (IsTrueDataDependence &&
+ couldPreventStoreLoadForward(Distance, TypeByteSize))
+ return true;
+
+ DEBUG(dbgs() << "LV: Positive distance " << Val.getSExtValue() <<
+ " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n');
+
return false;
}
+bool
+MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
+ MemAccessInfoSet &CheckDeps) {
+
+ MaxSafeDepDistBytes = -1U;
+ while (!CheckDeps.empty()) {
+ MemAccessInfo CurAccess = *CheckDeps.begin();
+
+ // Get the relevant memory access set.
+ EquivalenceClasses<MemAccessInfo>::iterator I =
+ AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
+
+ // Check accesses within this set.
+ EquivalenceClasses<MemAccessInfo>::member_iterator AI, AE;
+ AI = AccessSets.member_begin(I), AE = AccessSets.member_end();
+
+ // Check every access pair.
+ while (AI != AE) {
+ CheckDeps.erase(*AI);
+ EquivalenceClasses<MemAccessInfo>::member_iterator OI = llvm::next(AI);
+ while (OI != AE) {
+ // Check every accessing instruction pair in program order.
+ for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
+ I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
+ for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
+ I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
+ if (*I1 < *I2 && isDependent(*AI, *I1, *OI, *I2))
+ return false;
+ if (*I2 < *I1 && isDependent(*OI, *I2, *AI, *I1))
+ return false;
+ }
+ ++OI;
+ }
+ AI++;
+ }
+ }
+ return true;
+}
+
bool LoopVectorizationLegality::canVectorizeMemory() {
typedef SmallVector<Value*, 16> ValueVector;
typedef SmallPtrSet<Value*, 16> ValueSet;
+
// Holds the Load and Store *instructions*.
ValueVector Loads;
ValueVector Stores;
+
+ // Holds all the different accesses in the loop.
+ unsigned NumReads = 0;
+ unsigned NumReadWrites = 0;
+
PtrRtCheck.Pointers.clear();
PtrRtCheck.Need = false;
const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
+ MemoryDepChecker DepChecker(SE, DL, TheLoop);
// For each block.
for (Loop::block_iterator bb = TheLoop->block_begin(),
@@ -2639,6 +3862,13 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
// but is not a load, then we quit. Notice that we don't handle function
// calls that read or write.
if (it->mayReadFromMemory()) {
+ // Many math library functions read the rounding mode. We will only
+ // vectorize a loop if it contains known function calls that don't set
+ // the flag. Therefore, it is safe to ignore this read from memory.
+ CallInst *Call = dyn_cast<CallInst>(it);
+ if (Call && getIntrinsicIDForCall(Call, TLI))
+ continue;
+
LoadInst *Ld = dyn_cast<LoadInst>(it);
if (!Ld) return false;
if (!Ld->isSimple() && !IsAnnotatedParallel) {
@@ -2646,6 +3876,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
return false;
}
Loads.push_back(Ld);
+ DepChecker.addAccess(Ld);
continue;
}
@@ -2658,9 +3889,10 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
return false;
}
Stores.push_back(St);
+ DepChecker.addAccess(St);
}
- } // next instr.
- } // next block.
+ } // Next instr.
+ } // Next block.
// Now we have two lists that hold the loads and the stores.
// Next, we find the pointers that they use.
@@ -2672,10 +3904,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
return true;
}
- // Holds the read and read-write *pointers* that we find. These maps hold
- // unique values for pointers (so no need for multi-map).
- AliasMap Reads;
- AliasMap ReadWrites;
+ AccessAnalysis::DepCandidates DependentAccesses;
+ AccessAnalysis Accesses(DL, DependentAccesses);
// Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
// multiple times on the same object. If the ptr is accessed twice, once
@@ -2694,10 +3924,12 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
return false;
}
- // If we did *not* see this pointer before, insert it to
- // the read-write list. At this phase it is only a 'write' list.
- if (Seen.insert(Ptr))
- ReadWrites.insert(std::make_pair(Ptr, ST));
+ // If we did *not* see this pointer before, insert it to the read-write
+ // list. At this phase it is only a 'write' list.
+ if (Seen.insert(Ptr)) {
+ ++NumReadWrites;
+ Accesses.addStore(Ptr);
+ }
}
if (IsAnnotatedParallel) {
@@ -2718,51 +3950,44 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
// If the address of i is unknown (for example A[B[i]]) then we may
// read a few words, modify, and write a few words, and some of the
// words may be written to the same address.
- if (Seen.insert(Ptr) || 0 == isConsecutivePtr(Ptr))
- Reads.insert(std::make_pair(Ptr, LD));
+ bool IsReadOnlyPtr = false;
+ if (Seen.insert(Ptr) || !isStridedPtr(SE, DL, Ptr, TheLoop)) {
+ ++NumReads;
+ IsReadOnlyPtr = true;
+ }
+ Accesses.addLoad(Ptr, IsReadOnlyPtr);
}
// If we write (or read-write) to a single destination and there are no
// other reads in this loop then is it safe to vectorize.
- if (ReadWrites.size() == 1 && Reads.size() == 0) {
+ if (NumReadWrites == 1 && NumReads == 0) {
DEBUG(dbgs() << "LV: Found a write-only loop!\n");
return true;
}
- unsigned NumReadPtrs = 0;
- unsigned NumWritePtrs = 0;
+ // Build dependence sets and check whether we need a runtime pointer bounds
+ // check.
+ Accesses.buildDependenceSets();
+ bool NeedRTCheck = Accesses.isRTCheckNeeded();
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
- bool CanDoRT = true;
- AliasMap::iterator MI, ME;
- for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) {
- Value *V = (*MI).first;
- if (hasComputableBounds(V)) {
- PtrRtCheck.insert(SE, TheLoop, V, true);
- NumWritePtrs++;
- DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
- } else {
- CanDoRT = false;
- break;
- }
- }
- for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) {
- Value *V = (*MI).first;
- if (hasComputableBounds(V)) {
- PtrRtCheck.insert(SE, TheLoop, V, false);
- NumReadPtrs++;
- DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
- } else {
- CanDoRT = false;
- break;
- }
- }
+ unsigned NumComparisons = 0;
+ bool CanDoRT = false;
+ if (NeedRTCheck)
+ CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop);
+
+
+ DEBUG(dbgs() << "LV: We need to do " << NumComparisons <<
+ " pointer comparisons.\n");
- // Check that we did not collect too many pointers or found a
- // unsizeable pointer.
- unsigned NumComparisons = (NumWritePtrs * (NumReadPtrs + NumWritePtrs - 1));
- DEBUG(dbgs() << "LV: We need to compare " << NumComparisons << " ptrs.\n");
+ // If we only have one set of dependences to check pointers among we don't
+ // need a runtime check.
+ if (NumComparisons == 0 && NeedRTCheck)
+ NeedRTCheck = false;
+
+ // Check that we did not collect too many pointers or found an unsizeable
+ // pointer.
if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
PtrRtCheck.reset();
CanDoRT = false;
@@ -2772,122 +3997,69 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
DEBUG(dbgs() << "LV: We can perform a memory runtime check if needed.\n");
}
- bool NeedRTCheck = false;
-
- // Biggest vectorized access possible, vector width * unroll factor.
- // TODO: We're being very pessimistic here, find a way to know the
- // real access width before getting here.
- unsigned MaxByteWidth = (TTI->getRegisterBitWidth(true) / 8) *
- TTI->getMaximumUnrollFactor();
- // Now that the pointers are in two lists (Reads and ReadWrites), we
- // can check that there are no conflicts between each of the writes and
- // between the writes to the reads.
- // Note that WriteObjects duplicates the stores (indexed now by underlying
- // objects) to avoid pointing to elements inside ReadWrites.
- // TODO: Maybe create a new type where they can interact without duplication.
- AliasMultiMap WriteObjects;
- ValueVector TempObjects;
-
- // Check that the read-writes do not conflict with other read-write
- // pointers.
- bool AllWritesIdentified = true;
- for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) {
- Value *Val = (*MI).first;
- Instruction *Inst = (*MI).second;
-
- GetUnderlyingObjects(Val, TempObjects, DL);
- for (ValueVector::iterator UI=TempObjects.begin(), UE=TempObjects.end();
- UI != UE; ++UI) {
- if (!isIdentifiedObject(*UI)) {
- DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **UI <<"\n");
- NeedRTCheck = true;
- AllWritesIdentified = false;
- }
+ if (NeedRTCheck && !CanDoRT) {
+ DEBUG(dbgs() << "LV: We can't vectorize because we can't find " <<
+ "the array bounds.\n");
+ PtrRtCheck.reset();
+ return false;
+ }
- // Never seen it before, can't alias.
- if (WriteObjects[*UI].empty()) {
- DEBUG(dbgs() << "LV: Adding Underlying value:" << **UI <<"\n");
- WriteObjects[*UI].push_back(Inst);
- continue;
- }
- // Direct alias found.
- if (!AA || dyn_cast<GlobalValue>(*UI) == NULL) {
- DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
- << **UI <<"\n");
- return false;
- }
- DEBUG(dbgs() << "LV: Found a conflicting global value:"
- << **UI <<"\n");
- DEBUG(dbgs() << "LV: While examining store:" << *Inst <<"\n");
- DEBUG(dbgs() << "LV: On value:" << *Val <<"\n");
-
- // If global alias, make sure they do alias.
- if (hasPossibleGlobalWriteReorder(*UI,
- Inst,
- WriteObjects,
- MaxByteWidth)) {
- DEBUG(dbgs() << "LV: Found a possible write-write reorder:" << **UI
- << "\n");
+ PtrRtCheck.Need = NeedRTCheck;
+
+ bool CanVecMem = true;
+ if (Accesses.isDependencyCheckNeeded()) {
+ DEBUG(dbgs() << "LV: Checking memory dependencies\n");
+ CanVecMem = DepChecker.areDepsSafe(DependentAccesses,
+ Accesses.getDependenciesToCheck());
+ MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
+
+ if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
+ DEBUG(dbgs() << "LV: Retrying with memory checks\n");
+ NeedRTCheck = true;
+
+ // Clear the dependency checks. We assume they are not needed.
+ Accesses.resetDepChecks();
+
+ PtrRtCheck.reset();
+ PtrRtCheck.Need = true;
+
+ CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE,
+ TheLoop, true);
+ // Check that we did not collect too many pointers or found an unsizeable
+ // pointer.
+ if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
+ DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n");
+ PtrRtCheck.reset();
return false;
}
- // Didn't alias, insert into map for further reference.
- WriteObjects[*UI].push_back(Inst);
+ CanVecMem = true;
}
- TempObjects.clear();
}
- /// Check that the reads don't conflict with the read-writes.
- for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) {
- Value *Val = (*MI).first;
- GetUnderlyingObjects(Val, TempObjects, DL);
- for (ValueVector::iterator UI=TempObjects.begin(), UE=TempObjects.end();
- UI != UE; ++UI) {
- // If all of the writes are identified then we don't care if the read
- // pointer is identified or not.
- if (!AllWritesIdentified && !isIdentifiedObject(*UI)) {
- DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **UI <<"\n");
- NeedRTCheck = true;
- }
+ DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
+ " need a runtime memory check.\n");
- // Never seen it before, can't alias.
- if (WriteObjects[*UI].empty())
- continue;
- // Direct alias found.
- if (!AA || dyn_cast<GlobalValue>(*UI) == NULL) {
- DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
- << **UI <<"\n");
- return false;
- }
- DEBUG(dbgs() << "LV: Found a global value: "
- << **UI <<"\n");
- Instruction *Inst = (*MI).second;
- DEBUG(dbgs() << "LV: While examining load:" << *Inst <<"\n");
- DEBUG(dbgs() << "LV: On value:" << *Val <<"\n");
-
- // If global alias, make sure they do alias.
- if (hasPossibleGlobalWriteReorder(*UI,
- Inst,
- WriteObjects,
- MaxByteWidth)) {
- DEBUG(dbgs() << "LV: Found a possible read-write reorder:" << **UI
- << "\n");
- return false;
- }
- }
- TempObjects.clear();
- }
+ return CanVecMem;
+}
- PtrRtCheck.Need = NeedRTCheck;
- if (NeedRTCheck && !CanDoRT) {
- DEBUG(dbgs() << "LV: We can't vectorize because we can't find " <<
- "the array bounds.\n");
- PtrRtCheck.reset();
- return false;
+static bool hasMultipleUsesOf(Instruction *I,
+ SmallPtrSet<Instruction *, 8> &Insts) {
+ unsigned NumUses = 0;
+ for(User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) {
+ if (Insts.count(dyn_cast<Instruction>(*Use)))
+ ++NumUses;
+ if (NumUses > 1)
+ return true;
}
- DEBUG(dbgs() << "LV: We "<< (NeedRTCheck ? "" : "don't") <<
- " need a runtime memory check.\n");
+ return false;
+}
+
+static bool areAllUsesIn(Instruction *I, SmallPtrSet<Instruction *, 8> &Set) {
+ for(User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
+ if (!Set.count(dyn_cast<Instruction>(*Use)))
+ return false;
return true;
}
@@ -2909,116 +4081,154 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
// This includes users of the reduction, variables (which form a cycle
// which ends in the phi node).
Instruction *ExitInstruction = 0;
- // Indicates that we found a binary operation in our scan.
- bool FoundBinOp = false;
+ // Indicates that we found a reduction operation in our scan.
+ bool FoundReduxOp = false;
- // Iter is our iterator. We start with the PHI node and scan for all of the
- // users of this instruction. All users must be instructions that can be
- // used as reduction variables (such as ADD). We may have a single
- // out-of-block user. The cycle must end with the original PHI.
- Instruction *Iter = Phi;
+ // We start with the PHI node and scan for all of the users of this
+ // instruction. All users must be instructions that can be used as reduction
+ // variables (such as ADD). We must have a single out-of-block user. The cycle
+ // must include the original PHI.
+ bool FoundStartPHI = false;
// To recognize min/max patterns formed by a icmp select sequence, we store
// the number of instruction we saw from the recognized min/max pattern,
- // such that we don't stop when we see the phi has two uses (one by the select
- // and one by the icmp) and to make sure we only see exactly the two
- // instructions.
+ // to make sure we only see exactly the two instructions.
unsigned NumCmpSelectPatternInst = 0;
ReductionInstDesc ReduxDesc(false, 0);
- // Avoid cycles in the chain.
SmallPtrSet<Instruction *, 8> VisitedInsts;
- while (VisitedInsts.insert(Iter)) {
- // If the instruction has no users then this is a broken
- // chain and can't be a reduction variable.
- if (Iter->use_empty())
+ SmallVector<Instruction *, 8> Worklist;
+ Worklist.push_back(Phi);
+ VisitedInsts.insert(Phi);
+
+ // A value in the reduction can be used:
+ // - By the reduction:
+ // - Reduction operation:
+ // - One use of reduction value (safe).
+ // - Multiple use of reduction value (not safe).
+ // - PHI:
+ // - All uses of the PHI must be the reduction (safe).
+ // - Otherwise, not safe.
+ // - By one instruction outside of the loop (safe).
+ // - By further instructions outside of the loop (not safe).
+ // - By an instruction that is not part of the reduction (not safe).
+ // This is either:
+ // * An instruction type other than PHI or the reduction operation.
+ // * A PHI in the header other than the initial PHI.
+ while (!Worklist.empty()) {
+ Instruction *Cur = Worklist.back();
+ Worklist.pop_back();
+
+ // No Users.
+ // If the instruction has no users then this is a broken chain and can't be
+ // a reduction variable.
+ if (Cur->use_empty())
return false;
- // Did we find a user inside this loop already ?
- bool FoundInBlockUser = false;
- // Did we reach the initial PHI node already ?
- bool FoundStartPHI = false;
+ bool IsAPhi = isa<PHINode>(Cur);
- // Is this a bin op ?
- FoundBinOp |= !isa<PHINode>(Iter);
+ // A header PHI use other than the original PHI.
+ if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
+ return false;
- // For each of the *users* of iter.
- for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end();
- it != e; ++it) {
- Instruction *U = cast<Instruction>(*it);
- // We already know that the PHI is a user.
- if (U == Phi) {
- FoundStartPHI = true;
- continue;
- }
+ // Reductions of instructions such as Div, and Sub is only possible if the
+ // LHS is the reduction variable.
+ if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
+ !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
+ !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
+ return false;
+
+ // Any reduction instruction must be of one of the allowed kinds.
+ ReduxDesc = isReductionInstr(Cur, Kind, ReduxDesc);
+ if (!ReduxDesc.IsReduction)
+ return false;
+
+ // A reduction operation must only have one use of the reduction value.
+ if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax &&
+ hasMultipleUsesOf(Cur, VisitedInsts))
+ return false;
+
+ // All inputs to a PHI node must be a reduction value.
+ if(IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
+ return false;
+
+ if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(Cur) ||
+ isa<SelectInst>(Cur)))
+ ++NumCmpSelectPatternInst;
+ if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) ||
+ isa<SelectInst>(Cur)))
+ ++NumCmpSelectPatternInst;
+
+ // Check whether we found a reduction operator.
+ FoundReduxOp |= !IsAPhi;
+
+ // Process users of current instruction. Push non PHI nodes after PHI nodes
+ // onto the stack. This way we are going to have seen all inputs to PHI
+ // nodes once we get to them.
+ SmallVector<Instruction *, 8> NonPHIs;
+ SmallVector<Instruction *, 8> PHIs;
+ for (Value::use_iterator UI = Cur->use_begin(), E = Cur->use_end(); UI != E;
+ ++UI) {
+ Instruction *Usr = cast<Instruction>(*UI);
// Check if we found the exit user.
- BasicBlock *Parent = U->getParent();
+ BasicBlock *Parent = Usr->getParent();
if (!TheLoop->contains(Parent)) {
- // Exit if you find multiple outside users.
- if (ExitInstruction != 0)
+ // Exit if you find multiple outside users or if the header phi node is
+ // being used. In this case the user uses the value of the previous
+ // iteration, in which case we would loose "VF-1" iterations of the
+ // reduction operation if we vectorize.
+ if (ExitInstruction != 0 || Cur == Phi)
return false;
- ExitInstruction = Iter;
- }
- // We allow in-loop PHINodes which are not the original reduction PHI
- // node. If this PHI is the only user of Iter (happens in IF w/ no ELSE
- // structure) then don't skip this PHI.
- if (isa<PHINode>(Iter) && isa<PHINode>(U) &&
- U->getParent() != TheLoop->getHeader() &&
- TheLoop->contains(U) &&
- Iter->hasNUsesOrMore(2))
- continue;
+ // The instruction used by an outside user must be the last instruction
+ // before we feed back to the reduction phi. Otherwise, we loose VF-1
+ // operations on the value.
+ if (std::find(Phi->op_begin(), Phi->op_end(), Cur) == Phi->op_end())
+ return false;
- // We can't have multiple inside users except for a combination of
- // icmp/select both using the phi.
- if (FoundInBlockUser && !NumCmpSelectPatternInst)
- return false;
- FoundInBlockUser = true;
-
- // Any reduction instr must be of one of the allowed kinds.
- ReduxDesc = isReductionInstr(U, Kind, ReduxDesc);
- if (!ReduxDesc.IsReduction)
- return false;
+ ExitInstruction = Cur;
+ continue;
+ }
- if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(U) || isa<SelectInst>(U)))
- ++NumCmpSelectPatternInst;
- if (Kind == RK_FloatMinMax && (isa<FCmpInst>(U) || isa<SelectInst>(U)))
- ++NumCmpSelectPatternInst;
+ // Process instructions only once (termination).
+ if (VisitedInsts.insert(Usr)) {
+ if (isa<PHINode>(Usr))
+ PHIs.push_back(Usr);
+ else
+ NonPHIs.push_back(Usr);
+ }
+ // Remember that we completed the cycle.
+ if (Usr == Phi)
+ FoundStartPHI = true;
+ }
+ Worklist.append(PHIs.begin(), PHIs.end());
+ Worklist.append(NonPHIs.begin(), NonPHIs.end());
+ }
- // Reductions of instructions such as Div, and Sub is only
- // possible if the LHS is the reduction variable.
- if (!U->isCommutative() && !isa<PHINode>(U) && !isa<SelectInst>(U) &&
- !isa<ICmpInst>(U) && !isa<FCmpInst>(U) && U->getOperand(0) != Iter)
- return false;
+ // This means we have seen one but not the other instruction of the
+ // pattern or more than just a select and cmp.
+ if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
+ NumCmpSelectPatternInst != 2)
+ return false;
- Iter = ReduxDesc.PatternLastInst;
- }
+ if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
+ return false;
- // This means we have seen one but not the other instruction of the
- // pattern or more than just a select and cmp.
- if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
- NumCmpSelectPatternInst != 2)
- return false;
+ // We found a reduction var if we have reached the original phi node and we
+ // only have a single instruction with out-of-loop users.
- // We found a reduction var if we have reached the original
- // phi node and we only have a single instruction with out-of-loop
- // users.
- if (FoundStartPHI) {
- // This instruction is allowed to have out-of-loop users.
- AllowedExit.insert(ExitInstruction);
+ // This instruction is allowed to have out-of-loop users.
+ AllowedExit.insert(ExitInstruction);
- // Save the description of this reduction variable.
- ReductionDescriptor RD(RdxStart, ExitInstruction, Kind,
- ReduxDesc.MinMaxKind);
- Reductions[Phi] = RD;
- // We've ended the cycle. This is a reduction variable if we have an
- // outside user and it has a binary op.
- return FoundBinOp && ExitInstruction;
- }
- }
+ // Save the description of this reduction variable.
+ ReductionDescriptor RD(RdxStart, ExitInstruction, Kind,
+ ReduxDesc.MinMaxKind);
+ Reductions[Phi] = RD;
+ // We've ended the cycle. This is a reduction variable if we have an
+ // outside user and it has a binary op.
- return false;
+ return true;
}
/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
@@ -3169,12 +4379,28 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
return !DT->dominates(BB, Latch);
}
-bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
+bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
+ SmallPtrSet<Value *, 8>& SafePtrs) {
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
- // We don't predicate loads/stores at the moment.
- if (it->mayReadFromMemory() || it->mayWriteToMemory() || it->mayThrow())
+ // We might be able to hoist the load.
+ if (it->mayReadFromMemory()) {
+ LoadInst *LI = dyn_cast<LoadInst>(it);
+ if (!LI || !SafePtrs.count(LI->getPointerOperand()))
+ return false;
+ }
+
+ // We don't predicate stores at the moment.
+ if (it->mayWriteToMemory() || it->mayThrow())
return false;
+ // Check that we don't have a constant expression that can trap as operand.
+ for (Instruction::op_iterator OI = it->op_begin(), OE = it->op_end();
+ OI != OE; ++OI) {
+ if (Constant *C = dyn_cast<Constant>(*OI))
+ if (C->canTrap())
+ return false;
+ }
+
// The instructions below can trap.
switch (it->getOpcode()) {
default: continue;
@@ -3189,15 +4415,6 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
return true;
}
-bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) {
- const SCEV *PhiScev = SE->getSCEV(Ptr);
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
- if (!AR)
- return false;
-
- return AR->isAffine();
-}
-
LoopVectorizationCostModel::VectorizationFactor
LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
unsigned UserVF) {
@@ -3210,13 +4427,19 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
// Find the trip count.
unsigned TC = SE->getSmallConstantTripCount(TheLoop, TheLoop->getLoopLatch());
- DEBUG(dbgs() << "LV: Found trip count:"<<TC<<"\n");
+ DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
unsigned WidestType = getWidestType();
unsigned WidestRegister = TTI.getRegisterBitWidth(true);
+ unsigned MaxSafeDepDist = -1U;
+ if (Legal->getMaxSafeDepDistBytes() != -1U)
+ MaxSafeDepDist = Legal->getMaxSafeDepDistBytes() * 8;
+ WidestRegister = ((WidestRegister < MaxSafeDepDist) ?
+ WidestRegister : MaxSafeDepDist);
unsigned MaxVectorSize = WidestRegister / WidestType;
DEBUG(dbgs() << "LV: The Widest type: " << WidestType << " bits.\n");
- DEBUG(dbgs() << "LV: The Widest register is:" << WidestRegister << "bits.\n");
+ DEBUG(dbgs() << "LV: The Widest register is: "
+ << WidestRegister << " bits.\n");
if (MaxVectorSize == 0) {
DEBUG(dbgs() << "LV: The target has no vector registers.\n");
@@ -3252,7 +4475,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
if (UserVF != 0) {
assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
- DEBUG(dbgs() << "LV: Using user VF "<<UserVF<<".\n");
+ DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
Factor.Width = UserVF;
return Factor;
@@ -3260,13 +4483,13 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
float Cost = expectedCost(1);
unsigned Width = 1;
- DEBUG(dbgs() << "LV: Scalar loop costs: "<< (int)Cost << ".\n");
+ DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)Cost << ".\n");
for (unsigned i=2; i <= VF; i*=2) {
// Notice that the vector loop needs to be executed less times, so
// we need to divide the cost of the vector loops by the width of
// the vector elements.
float VectorCost = expectedCost(i) / (float)i;
- DEBUG(dbgs() << "LV: Vector loop of width "<< i << " costs: " <<
+ DEBUG(dbgs() << "LV: Vector loop of width " << i << " costs: " <<
(int)VectorCost << ".\n");
if (VectorCost < Cost) {
Cost = VectorCost;
@@ -3347,6 +4570,10 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
if (OptForSize)
return 1;
+ // We used the distance for the unroll factor.
+ if (Legal->getMaxSafeDepDistBytes() != -1U)
+ return 1;
+
// Do not unroll loops with a relatively small trip count.
unsigned TC = SE->getSmallConstantTripCount(TheLoop,
TheLoop->getLoopLatch());
@@ -3386,8 +4613,20 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
else if (UF < 1)
UF = 1;
- if (Legal->getReductionVars()->size()) {
- DEBUG(dbgs() << "LV: Unrolling because of reductions. \n");
+ bool HasReductions = Legal->getReductionVars()->size();
+
+ // Decide if we want to unroll if we decided that it is legal to vectorize
+ // but not profitable.
+ if (VF == 1) {
+ if (TheLoop->getNumBlocks() > 1 || !HasReductions ||
+ LoopCost > SmallLoopCost)
+ return 1;
+
+ return UF;
+ }
+
+ if (HasReductions) {
+ DEBUG(dbgs() << "LV: Unrolling because of reductions.\n");
return UF;
}
@@ -3395,14 +4634,14 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
// We assume that the cost overhead is 1 and we use the cost model
// to estimate the cost of the loop and unroll until the cost of the
// loop overhead is about 5% of the cost of the loop.
- DEBUG(dbgs() << "LV: Loop cost is "<< LoopCost <<" \n");
- if (LoopCost < 20) {
- DEBUG(dbgs() << "LV: Unrolling to reduce branch cost. \n");
- unsigned NewUF = 20/LoopCost + 1;
+ DEBUG(dbgs() << "LV: Loop cost is " << LoopCost << '\n');
+ if (LoopCost < SmallLoopCost) {
+ DEBUG(dbgs() << "LV: Unrolling to reduce branch cost.\n");
+ unsigned NewUF = SmallLoopCost / (LoopCost + 1);
return std::min(NewUF, UF);
}
- DEBUG(dbgs() << "LV: Not Unrolling. \n");
+ DEBUG(dbgs() << "LV: Not Unrolling.\n");
return 1;
}
@@ -3503,16 +4742,16 @@ LoopVectorizationCostModel::calculateRegisterUsage() {
MaxUsage = std::max(MaxUsage, OpenIntervals.size());
DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " <<
- OpenIntervals.size() <<"\n");
+ OpenIntervals.size() << '\n');
// Add the current instruction to the list of open intervals.
OpenIntervals.insert(I);
}
unsigned Invariant = LoopInvariants.size();
- DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsage << " \n");
- DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << " \n");
- DEBUG(dbgs() << "LV(REG): LoopSize: " << R.NumInstructions << " \n");
+ DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsage << '\n');
+ DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n');
+ DEBUG(dbgs() << "LV(REG): LoopSize: " << R.NumInstructions << '\n');
R.LoopInvariantRegs = Invariant;
R.MaxLocalUsers = MaxUsage;
@@ -3535,15 +4774,15 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
continue;
unsigned C = getInstructionCost(it, VF);
- Cost += C;
- DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF " <<
- VF << " For instruction: "<< *it << "\n");
+ BlockCost += C;
+ DEBUG(dbgs() << "LV: Found an estimated cost of " << C << " for VF " <<
+ VF << " For instruction: " << *it << '\n');
}
// We assume that if-converted blocks have a 50% chance of being executed.
// When the code is scalar then some of the blocks are avoided due to CF.
// When the code is vectorized we execute all code paths.
- if (Legal->blockNeedsPredication(*bb) && VF == 1)
+ if (VF == 1 && Legal->blockNeedsPredication(*bb))
BlockCost /= 2;
Cost += BlockCost;
@@ -3552,6 +4791,59 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
return Cost;
}
+/// \brief Check whether the address computation for a non-consecutive memory
+/// access looks like an unlikely candidate for being merged into the indexing
+/// mode.
+///
+/// We look for a GEP which has one index that is an induction variable and all
+/// other indices are loop invariant. If the stride of this access is also
+/// within a small bound we decide that this address computation can likely be
+/// merged into the addressing mode.
+/// In all other cases, we identify the address computation as complex.
+static bool isLikelyComplexAddressComputation(Value *Ptr,
+ LoopVectorizationLegality *Legal,
+ ScalarEvolution *SE,
+ const Loop *TheLoop) {
+ GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!Gep)
+ return true;
+
+ // We are looking for a gep with all loop invariant indices except for one
+ // which should be an induction variable.
+ unsigned NumOperands = Gep->getNumOperands();
+ for (unsigned i = 1; i < NumOperands; ++i) {
+ Value *Opd = Gep->getOperand(i);
+ if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) &&
+ !Legal->isInductionVariable(Opd))
+ return true;
+ }
+
+ // Now we know we have a GEP ptr, %inv, %ind, %inv. Make sure that the step
+ // can likely be merged into the address computation.
+ unsigned MaxMergeDistance = 64;
+
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Ptr));
+ if (!AddRec)
+ return true;
+
+ // Check the step is constant.
+ const SCEV *Step = AddRec->getStepRecurrence(*SE);
+ // Calculate the pointer stride and check if it is consecutive.
+ const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
+ if (!C)
+ return true;
+
+ const APInt &APStepVal = C->getValue()->getValue();
+
+ // Huge step value - give up.
+ if (APStepVal.getBitWidth() > 64)
+ return true;
+
+ int64_t StepVal = APStepVal.getSExtValue();
+
+ return StepVal > MaxMergeDistance;
+}
+
unsigned
LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
// If we know that this instruction will remain uniform, check the cost of
@@ -3647,6 +4939,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ValTy);
unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF;
if (!ConsecutiveStride || ScalarAllocatedSize != VectorElementSize) {
+ bool IsComplexComputation =
+ isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop);
unsigned Cost = 0;
// The cost of extracting from the value vector and pointer vector.
Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
@@ -3662,7 +4956,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
}
// The cost of the scalar loads/stores.
- Cost += VF * TTI.getAddressComputationCost(ValTy->getScalarType());
+ Cost += VF * TTI.getAddressComputationCost(PtrTy, IsComplexComputation);
Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
Alignment, AS);
return Cost;
@@ -3743,15 +5037,17 @@ Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) {
char LoopVectorize::ID = 0;
static const char lv_name[] = "Loop Vectorization";
INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
namespace llvm {
- Pass *createLoopVectorizePass() {
- return new LoopVectorize();
+ Pass *createLoopVectorizePass(bool NoUnrolling) {
+ return new LoopVectorize(NoUnrolling);
}
}
@@ -3766,3 +5062,96 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
return false;
}
+
+
+void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr) {
+ assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
+ // Holds vector parameters or scalars, in case of uniform vals.
+ SmallVector<VectorParts, 4> Params;
+
+ setDebugLocFromInst(Builder, Instr);
+
+ // Find all of the vectorized parameters.
+ for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
+ Value *SrcOp = Instr->getOperand(op);
+
+ // If we are accessing the old induction variable, use the new one.
+ if (SrcOp == OldInduction) {
+ Params.push_back(getVectorValue(SrcOp));
+ continue;
+ }
+
+ // Try using previously calculated values.
+ Instruction *SrcInst = dyn_cast<Instruction>(SrcOp);
+
+ // If the src is an instruction that appeared earlier in the basic block
+ // then it should already be vectorized.
+ if (SrcInst && OrigLoop->contains(SrcInst)) {
+ assert(WidenMap.has(SrcInst) && "Source operand is unavailable");
+ // The parameter is a vector value from earlier.
+ Params.push_back(WidenMap.get(SrcInst));
+ } else {
+ // The parameter is a scalar from outside the loop. Maybe even a constant.
+ VectorParts Scalars;
+ Scalars.append(UF, SrcOp);
+ Params.push_back(Scalars);
+ }
+ }
+
+ assert(Params.size() == Instr->getNumOperands() &&
+ "Invalid number of operands");
+
+ // Does this instruction return a value ?
+ bool IsVoidRetTy = Instr->getType()->isVoidTy();
+
+ Value *UndefVec = IsVoidRetTy ? 0 :
+ UndefValue::get(Instr->getType());
+ // Create a new entry in the WidenMap and initialize it to Undef or Null.
+ VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
+
+ // For each vector unroll 'part':
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ // For each scalar that we create:
+
+ Instruction *Cloned = Instr->clone();
+ if (!IsVoidRetTy)
+ Cloned->setName(Instr->getName() + ".cloned");
+ // Replace the operands of the cloned instructions with extracted scalars.
+ for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
+ Value *Op = Params[op][Part];
+ Cloned->setOperand(op, Op);
+ }
+
+ // Place the cloned scalar in the new loop.
+ Builder.Insert(Cloned);
+
+ // If the original scalar returns a value we need to place it in a vector
+ // so that future users will be able to use it.
+ if (!IsVoidRetTy)
+ VecResults[Part] = Cloned;
+ }
+}
+
+void
+InnerLoopUnroller::vectorizeMemoryInstruction(Instruction *Instr,
+ LoopVectorizationLegality*) {
+ return scalarizeInstruction(Instr);
+}
+
+Value *InnerLoopUnroller::reverseVector(Value *Vec) {
+ return Vec;
+}
+
+Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) {
+ return V;
+}
+
+Value *InnerLoopUnroller::getConsecutiveVector(Value* Val, int StartIdx,
+ bool Negate) {
+ // When unrolling and the VF is 1, we only need to add a simple scalar.
+ Type *ITy = Val->getType();
+ assert(!ITy->isVectorTy() && "Val must be a scalar");
+ Constant *C = ConstantInt::get(ITy, StartIdx, Negate);
+ return Builder.CreateAdd(Val, C, "induction");
+}
+
diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cc30cc9..c72b51f 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -16,18 +16,23 @@
//
//===----------------------------------------------------------------------===//
#define SV_NAME "slp-vectorizer"
-#define DEBUG_TYPE SV_NAME
+#define DEBUG_TYPE "SLP"
-#include "VecUtils.h"
#include "llvm/Transforms/Vectorize.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
@@ -35,19 +40,1717 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
#include <map>
using namespace llvm;
static cl::opt<int>
-SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
- cl::desc("Only vectorize trees if the gain is above this "
- "number. (gain = -cost of vectorization)"));
+ SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
+ cl::desc("Only vectorize if you gain more than this "
+ "number "));
+
+static cl::opt<bool>
+ShouldVectorizeHor("slp-vectorize-hor", cl::init(false), cl::Hidden,
+ cl::desc("Attempt to vectorize horizontal reductions"));
+
+static cl::opt<bool> ShouldStartVectorizeHorAtStore(
+ "slp-vectorize-hor-store", cl::init(false), cl::Hidden,
+ cl::desc(
+ "Attempt to vectorize horizontal reductions feeding into a store"));
+
namespace {
+static const unsigned MinVecRegSize = 128;
+
+static const unsigned RecursionMaxDepth = 12;
+
+/// A helper class for numbering instructions in multiple blocks.
+/// Numbers start at zero for each basic block.
+struct BlockNumbering {
+
+ BlockNumbering(BasicBlock *Bb) : BB(Bb), Valid(false) {}
+
+ BlockNumbering() : BB(0), Valid(false) {}
+
+ void numberInstructions() {
+ unsigned Loc = 0;
+ InstrIdx.clear();
+ InstrVec.clear();
+ // Number the instructions in the block.
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ InstrIdx[it] = Loc++;
+ InstrVec.push_back(it);
+ assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
+ }
+ Valid = true;
+ }
+
+ int getIndex(Instruction *I) {
+ assert(I->getParent() == BB && "Invalid instruction");
+ if (!Valid)
+ numberInstructions();
+ assert(InstrIdx.count(I) && "Unknown instruction");
+ return InstrIdx[I];
+ }
+
+ Instruction *getInstruction(unsigned loc) {
+ if (!Valid)
+ numberInstructions();
+ assert(InstrVec.size() > loc && "Invalid Index");
+ return InstrVec[loc];
+ }
+
+ void forget() { Valid = false; }
+
+private:
+ /// The block we are numbering.
+ BasicBlock *BB;
+ /// Is the block numbered.
+ bool Valid;
+ /// Maps instructions to numbers and back.
+ SmallDenseMap<Instruction *, int> InstrIdx;
+ /// Maps integers to Instructions.
+ SmallVector<Instruction *, 32> InstrVec;
+};
+
+/// \returns the parent basic block if all of the instructions in \p VL
+/// are in the same block or null otherwise.
+static BasicBlock *getSameBlock(ArrayRef<Value *> VL) {
+ Instruction *I0 = dyn_cast<Instruction>(VL[0]);
+ if (!I0)
+ return 0;
+ BasicBlock *BB = I0->getParent();
+ for (int i = 1, e = VL.size(); i < e; i++) {
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ if (!I)
+ return 0;
+
+ if (BB != I->getParent())
+ return 0;
+ }
+ return BB;
+}
+
+/// \returns True if all of the values in \p VL are constants.
+static bool allConstant(ArrayRef<Value *> VL) {
+ for (unsigned i = 0, e = VL.size(); i < e; ++i)
+ if (!isa<Constant>(VL[i]))
+ return false;
+ return true;
+}
+
+/// \returns True if all of the values in \p VL are identical.
+static bool isSplat(ArrayRef<Value *> VL) {
+ for (unsigned i = 1, e = VL.size(); i < e; ++i)
+ if (VL[i] != VL[0])
+ return false;
+ return true;
+}
+
+/// \returns The opcode if all of the Instructions in \p VL have the same
+/// opcode, or zero.
+static unsigned getSameOpcode(ArrayRef<Value *> VL) {
+ Instruction *I0 = dyn_cast<Instruction>(VL[0]);
+ if (!I0)
+ return 0;
+ unsigned Opcode = I0->getOpcode();
+ for (int i = 1, e = VL.size(); i < e; i++) {
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ if (!I || Opcode != I->getOpcode())
+ return 0;
+ }
+ return Opcode;
+}
+
+/// \returns \p I after propagating metadata from \p VL.
+static Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) {
+ Instruction *I0 = cast<Instruction>(VL[0]);
+ SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
+ I0->getAllMetadataOtherThanDebugLoc(Metadata);
+
+ for (unsigned i = 0, n = Metadata.size(); i != n; ++i) {
+ unsigned Kind = Metadata[i].first;
+ MDNode *MD = Metadata[i].second;
+
+ for (int i = 1, e = VL.size(); MD && i != e; i++) {
+ Instruction *I = cast<Instruction>(VL[i]);
+ MDNode *IMD = I->getMetadata(Kind);
+
+ switch (Kind) {
+ default:
+ MD = 0; // Remove unknown metadata
+ break;
+ case LLVMContext::MD_tbaa:
+ MD = MDNode::getMostGenericTBAA(MD, IMD);
+ break;
+ case LLVMContext::MD_fpmath:
+ MD = MDNode::getMostGenericFPMath(MD, IMD);
+ break;
+ }
+ }
+ I->setMetadata(Kind, MD);
+ }
+ return I;
+}
+
+/// \returns The type that all of the values in \p VL have or null if there
+/// are different types.
+static Type* getSameType(ArrayRef<Value *> VL) {
+ Type *Ty = VL[0]->getType();
+ for (int i = 1, e = VL.size(); i < e; i++)
+ if (VL[i]->getType() != Ty)
+ return 0;
+
+ return Ty;
+}
+
+/// \returns True if the ExtractElement instructions in VL can be vectorized
+/// to use the original vector.
+static bool CanReuseExtract(ArrayRef<Value *> VL) {
+ assert(Instruction::ExtractElement == getSameOpcode(VL) && "Invalid opcode");
+ // Check if all of the extracts come from the same vector and from the
+ // correct offset.
+ Value *VL0 = VL[0];
+ ExtractElementInst *E0 = cast<ExtractElementInst>(VL0);
+ Value *Vec = E0->getOperand(0);
+
+ // We have to extract from the same vector type.
+ unsigned NElts = Vec->getType()->getVectorNumElements();
+
+ if (NElts != VL.size())
+ return false;
+
+ // Check that all of the indices extract from the correct offset.
+ ConstantInt *CI = dyn_cast<ConstantInt>(E0->getOperand(1));
+ if (!CI || CI->getZExtValue())
+ return false;
+
+ for (unsigned i = 1, e = VL.size(); i < e; ++i) {
+ ExtractElementInst *E = cast<ExtractElementInst>(VL[i]);
+ ConstantInt *CI = dyn_cast<ConstantInt>(E->getOperand(1));
+
+ if (!CI || CI->getZExtValue() != i || E->getOperand(0) != Vec)
+ return false;
+ }
+
+ return true;
+}
+
+static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+ SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right) {
+
+ SmallVector<Value *, 16> OrigLeft, OrigRight;
+
+ bool AllSameOpcodeLeft = true;
+ bool AllSameOpcodeRight = true;
+ for (unsigned i = 0, e = VL.size(); i != e; ++i) {
+ Instruction *I = cast<Instruction>(VL[i]);
+ Value *V0 = I->getOperand(0);
+ Value *V1 = I->getOperand(1);
+
+ OrigLeft.push_back(V0);
+ OrigRight.push_back(V1);
+
+ Instruction *I0 = dyn_cast<Instruction>(V0);
+ Instruction *I1 = dyn_cast<Instruction>(V1);
+
+ // Check whether all operands on one side have the same opcode. In this case
+ // we want to preserve the original order and not make things worse by
+ // reordering.
+ AllSameOpcodeLeft = I0;
+ AllSameOpcodeRight = I1;
+
+ if (i && AllSameOpcodeLeft) {
+ if(Instruction *P0 = dyn_cast<Instruction>(OrigLeft[i-1])) {
+ if(P0->getOpcode() != I0->getOpcode())
+ AllSameOpcodeLeft = false;
+ } else
+ AllSameOpcodeLeft = false;
+ }
+ if (i && AllSameOpcodeRight) {
+ if(Instruction *P1 = dyn_cast<Instruction>(OrigRight[i-1])) {
+ if(P1->getOpcode() != I1->getOpcode())
+ AllSameOpcodeRight = false;
+ } else
+ AllSameOpcodeRight = false;
+ }
+
+ // Sort two opcodes. In the code below we try to preserve the ability to use
+ // broadcast of values instead of individual inserts.
+ // vl1 = load
+ // vl2 = phi
+ // vr1 = load
+ // vr2 = vr2
+ // = vl1 x vr1
+ // = vl2 x vr2
+ // If we just sorted according to opcode we would leave the first line in
+ // tact but we would swap vl2 with vr2 because opcode(phi) > opcode(load).
+ // = vl1 x vr1
+ // = vr2 x vl2
+ // Because vr2 and vr1 are from the same load we loose the opportunity of a
+ // broadcast for the packed right side in the backend: we have [vr1, vl2]
+ // instead of [vr1, vr2=vr1].
+ if (I0 && I1) {
+ if(!i && I0->getOpcode() > I1->getOpcode()) {
+ Left.push_back(I1);
+ Right.push_back(I0);
+ } else if (i && I0->getOpcode() > I1->getOpcode() && Right[i-1] != I1) {
+ // Try not to destroy a broad cast for no apparent benefit.
+ Left.push_back(I1);
+ Right.push_back(I0);
+ } else if (i && I0->getOpcode() == I1->getOpcode() && Right[i-1] == I0) {
+ // Try preserve broadcasts.
+ Left.push_back(I1);
+ Right.push_back(I0);
+ } else if (i && I0->getOpcode() == I1->getOpcode() && Left[i-1] == I1) {
+ // Try preserve broadcasts.
+ Left.push_back(I1);
+ Right.push_back(I0);
+ } else {
+ Left.push_back(I0);
+ Right.push_back(I1);
+ }
+ continue;
+ }
+ // One opcode, put the instruction on the right.
+ if (I0) {
+ Left.push_back(V1);
+ Right.push_back(I0);
+ continue;
+ }
+ Left.push_back(V0);
+ Right.push_back(V1);
+ }
+
+ bool LeftBroadcast = isSplat(Left);
+ bool RightBroadcast = isSplat(Right);
+
+ // Don't reorder if the operands where good to begin with.
+ if (!(LeftBroadcast || RightBroadcast) &&
+ (AllSameOpcodeRight || AllSameOpcodeLeft)) {
+ Left = OrigLeft;
+ Right = OrigRight;
+ }
+}
+
+/// Bottom Up SLP Vectorizer.
+class BoUpSLP {
+public:
+ typedef SmallVector<Value *, 8> ValueList;
+ typedef SmallVector<Instruction *, 16> InstrList;
+ typedef SmallPtrSet<Value *, 16> ValueSet;
+ typedef SmallVector<StoreInst *, 8> StoreList;
+
+ BoUpSLP(Function *Func, ScalarEvolution *Se, DataLayout *Dl,
+ TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li,
+ DominatorTree *Dt) :
+ F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li), DT(Dt),
+ Builder(Se->getContext()) {
+ // Setup the block numbering utility for all of the blocks in the
+ // function.
+ for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it) {
+ BasicBlock *BB = it;
+ BlocksNumbers[BB] = BlockNumbering(BB);
+ }
+ }
+
+ /// \brief Vectorize the tree that starts with the elements in \p VL.
+ /// Returns the vectorized root.
+ Value *vectorizeTree();
+
+ /// \returns the vectorization cost of the subtree that starts at \p VL.
+ /// A negative number means that this is profitable.
+ int getTreeCost();
+
+ /// Construct a vectorizable tree that starts at \p Roots and is possibly
+ /// used by a reduction of \p RdxOps.
+ void buildTree(ArrayRef<Value *> Roots, ValueSet *RdxOps = 0);
+
+ /// Clear the internal data structures that are created by 'buildTree'.
+ void deleteTree() {
+ RdxOps = 0;
+ VectorizableTree.clear();
+ ScalarToTreeEntry.clear();
+ MustGather.clear();
+ ExternalUses.clear();
+ MemBarrierIgnoreList.clear();
+ }
+
+ /// \returns true if the memory operations A and B are consecutive.
+ bool isConsecutiveAccess(Value *A, Value *B);
+
+ /// \brief Perform LICM and CSE on the newly generated gather sequences.
+ void optimizeGatherSequence();
+private:
+ struct TreeEntry;
+
+ /// \returns the cost of the vectorizable entry.
+ int getEntryCost(TreeEntry *E);
+
+ /// This is the recursive part of buildTree.
+ void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth);
+
+ /// Vectorize a single entry in the tree.
+ Value *vectorizeTree(TreeEntry *E);
+
+ /// Vectorize a single entry in the tree, starting in \p VL.
+ Value *vectorizeTree(ArrayRef<Value *> VL);
+
+ /// \returns the pointer to the vectorized value if \p VL is already
+ /// vectorized, or NULL. They may happen in cycles.
+ Value *alreadyVectorized(ArrayRef<Value *> VL) const;
+
+ /// \brief Take the pointer operand from the Load/Store instruction.
+ /// \returns NULL if this is not a valid Load/Store instruction.
+ static Value *getPointerOperand(Value *I);
+
+ /// \brief Take the address space operand from the Load/Store instruction.
+ /// \returns -1 if this is not a valid Load/Store instruction.
+ static unsigned getAddressSpaceOperand(Value *I);
+
+ /// \returns the scalarization cost for this type. Scalarization in this
+ /// context means the creation of vectors from a group of scalars.
+ int getGatherCost(Type *Ty);
+
+ /// \returns the scalarization cost for this list of values. Assuming that
+ /// this subtree gets vectorized, we may need to extract the values from the
+ /// roots. This method calculates the cost of extracting the values.
+ int getGatherCost(ArrayRef<Value *> VL);
+
+ /// \returns the AA location that is being access by the instruction.
+ AliasAnalysis::Location getLocation(Instruction *I);
+
+ /// \brief Checks if it is possible to sink an instruction from
+ /// \p Src to \p Dst.
+ /// \returns the pointer to the barrier instruction if we can't sink.
+ Value *getSinkBarrier(Instruction *Src, Instruction *Dst);
+
+ /// \returns the index of the last instruction in the BB from \p VL.
+ int getLastIndex(ArrayRef<Value *> VL);
+
+ /// \returns the Instruction in the bundle \p VL.
+ Instruction *getLastInstruction(ArrayRef<Value *> VL);
+
+ /// \brief Set the Builder insert point to one after the last instruction in
+ /// the bundle
+ void setInsertPointAfterBundle(ArrayRef<Value *> VL);
+
+ /// \returns a vector from a collection of scalars in \p VL.
+ Value *Gather(ArrayRef<Value *> VL, VectorType *Ty);
+
+ /// \returns whether the VectorizableTree is fully vectoriable and will
+ /// be beneficial even the tree height is tiny.
+ bool isFullyVectorizableTinyTree();
+
+ struct TreeEntry {
+ TreeEntry() : Scalars(), VectorizedValue(0), LastScalarIndex(0),
+ NeedToGather(0) {}
+
+ /// \returns true if the scalars in VL are equal to this entry.
+ bool isSame(ArrayRef<Value *> VL) const {
+ assert(VL.size() == Scalars.size() && "Invalid size");
+ return std::equal(VL.begin(), VL.end(), Scalars.begin());
+ }
+
+ /// A vector of scalars.
+ ValueList Scalars;
+
+ /// The Scalars are vectorized into this value. It is initialized to Null.
+ Value *VectorizedValue;
+
+ /// The index in the basic block of the last scalar.
+ int LastScalarIndex;
+
+ /// Do we need to gather this sequence ?
+ bool NeedToGather;
+ };
+
+ /// Create a new VectorizableTree entry.
+ TreeEntry *newTreeEntry(ArrayRef<Value *> VL, bool Vectorized) {
+ VectorizableTree.push_back(TreeEntry());
+ int idx = VectorizableTree.size() - 1;
+ TreeEntry *Last = &VectorizableTree[idx];
+ Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
+ Last->NeedToGather = !Vectorized;
+ if (Vectorized) {
+ Last->LastScalarIndex = getLastIndex(VL);
+ for (int i = 0, e = VL.size(); i != e; ++i) {
+ assert(!ScalarToTreeEntry.count(VL[i]) && "Scalar already in tree!");
+ ScalarToTreeEntry[VL[i]] = idx;
+ }
+ } else {
+ Last->LastScalarIndex = 0;
+ MustGather.insert(VL.begin(), VL.end());
+ }
+ return Last;
+ }
+
+ /// -- Vectorization State --
+ /// Holds all of the tree entries.
+ std::vector<TreeEntry> VectorizableTree;
+
+ /// Maps a specific scalar to its tree entry.
+ SmallDenseMap<Value*, int> ScalarToTreeEntry;
+
+ /// A list of scalars that we found that we need to keep as scalars.
+ ValueSet MustGather;
+
+ /// This POD struct describes one external user in the vectorized tree.
+ struct ExternalUser {
+ ExternalUser (Value *S, llvm::User *U, int L) :
+ Scalar(S), User(U), Lane(L){};
+ // Which scalar in our function.
+ Value *Scalar;
+ // Which user that uses the scalar.
+ llvm::User *User;
+ // Which lane does the scalar belong to.
+ int Lane;
+ };
+ typedef SmallVector<ExternalUser, 16> UserList;
+
+ /// A list of values that need to extracted out of the tree.
+ /// This list holds pairs of (Internal Scalar : External User).
+ UserList ExternalUses;
+
+ /// A list of instructions to ignore while sinking
+ /// memory instructions. This map must be reset between runs of getCost.
+ ValueSet MemBarrierIgnoreList;
+
+ /// Holds all of the instructions that we gathered.
+ SetVector<Instruction *> GatherSeq;
+ /// A list of blocks that we are going to CSE.
+ SmallSet<BasicBlock *, 8> CSEBlocks;
+
+ /// Numbers instructions in different blocks.
+ DenseMap<BasicBlock *, BlockNumbering> BlocksNumbers;
+
+ /// Reduction operators.
+ ValueSet *RdxOps;
+
+ // Analysis and block reference.
+ Function *F;
+ ScalarEvolution *SE;
+ DataLayout *DL;
+ TargetTransformInfo *TTI;
+ AliasAnalysis *AA;
+ LoopInfo *LI;
+ DominatorTree *DT;
+ /// Instruction builder to construct the vectorized tree.
+ IRBuilder<> Builder;
+};
+
+void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) {
+ deleteTree();
+ RdxOps = Rdx;
+ if (!getSameType(Roots))
+ return;
+ buildTree_rec(Roots, 0);
+
+ // Collect the values that we need to extract from the tree.
+ for (int EIdx = 0, EE = VectorizableTree.size(); EIdx < EE; ++EIdx) {
+ TreeEntry *Entry = &VectorizableTree[EIdx];
+
+ // For each lane:
+ for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
+ Value *Scalar = Entry->Scalars[Lane];
+
+ // No need to handle users of gathered values.
+ if (Entry->NeedToGather)
+ continue;
+
+ for (Value::use_iterator User = Scalar->use_begin(),
+ UE = Scalar->use_end(); User != UE; ++User) {
+ DEBUG(dbgs() << "SLP: Checking user:" << **User << ".\n");
+
+ // Skip in-tree scalars that become vectors.
+ if (ScalarToTreeEntry.count(*User)) {
+ DEBUG(dbgs() << "SLP: \tInternal user will be removed:" <<
+ **User << ".\n");
+ int Idx = ScalarToTreeEntry[*User]; (void) Idx;
+ assert(!VectorizableTree[Idx].NeedToGather && "Bad state");
+ continue;
+ }
+ Instruction *UserInst = dyn_cast<Instruction>(*User);
+ if (!UserInst)
+ continue;
+
+ // Ignore uses that are part of the reduction.
+ if (Rdx && std::find(Rdx->begin(), Rdx->end(), UserInst) != Rdx->end())
+ continue;
+
+ DEBUG(dbgs() << "SLP: Need to extract:" << **User << " from lane " <<
+ Lane << " from " << *Scalar << ".\n");
+ ExternalUses.push_back(ExternalUser(Scalar, *User, Lane));
+ }
+ }
+ }
+}
+
+
+void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
+ bool SameTy = getSameType(VL); (void)SameTy;
+ assert(SameTy && "Invalid types!");
+
+ if (Depth == RecursionMaxDepth) {
+ DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
+ newTreeEntry(VL, false);
+ return;
+ }
+
+ // Don't handle vectors.
+ if (VL[0]->getType()->isVectorTy()) {
+ DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
+ newTreeEntry(VL, false);
+ return;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+ if (SI->getValueOperand()->getType()->isVectorTy()) {
+ DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
+ newTreeEntry(VL, false);
+ return;
+ }
+
+ // If all of the operands are identical or constant we have a simple solution.
+ if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) ||
+ !getSameOpcode(VL)) {
+ DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
+ newTreeEntry(VL, false);
+ return;
+ }
+
+ // We now know that this is a vector of instructions of the same type from
+ // the same block.
+
+ // Check if this is a duplicate of another entry.
+ if (ScalarToTreeEntry.count(VL[0])) {
+ int Idx = ScalarToTreeEntry[VL[0]];
+ TreeEntry *E = &VectorizableTree[Idx];
+ for (unsigned i = 0, e = VL.size(); i != e; ++i) {
+ DEBUG(dbgs() << "SLP: \tChecking bundle: " << *VL[i] << ".\n");
+ if (E->Scalars[i] != VL[i]) {
+ DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
+ newTreeEntry(VL, false);
+ return;
+ }
+ }
+ DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *VL[0] << ".\n");
+ return;
+ }
+
+ // Check that none of the instructions in the bundle are already in the tree.
+ for (unsigned i = 0, e = VL.size(); i != e; ++i) {
+ if (ScalarToTreeEntry.count(VL[i])) {
+ DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] <<
+ ") is already in tree.\n");
+ newTreeEntry(VL, false);
+ return;
+ }
+ }
+
+ // If any of the scalars appears in the table OR it is marked as a value that
+ // needs to stat scalar then we need to gather the scalars.
+ for (unsigned i = 0, e = VL.size(); i != e; ++i) {
+ if (ScalarToTreeEntry.count(VL[i]) || MustGather.count(VL[i])) {
+ DEBUG(dbgs() << "SLP: Gathering due to gathered scalar. \n");
+ newTreeEntry(VL, false);
+ return;
+ }
+ }
+
+ // Check that all of the users of the scalars that we want to vectorize are
+ // schedulable.
+ Instruction *VL0 = cast<Instruction>(VL[0]);
+ int MyLastIndex = getLastIndex(VL);
+ BasicBlock *BB = cast<Instruction>(VL0)->getParent();
+
+ for (unsigned i = 0, e = VL.size(); i != e; ++i) {
+ Instruction *Scalar = cast<Instruction>(VL[i]);
+ DEBUG(dbgs() << "SLP: Checking users of " << *Scalar << ". \n");
+ for (Value::use_iterator U = Scalar->use_begin(), UE = Scalar->use_end();
+ U != UE; ++U) {
+ DEBUG(dbgs() << "SLP: \tUser " << **U << ". \n");
+ Instruction *User = dyn_cast<Instruction>(*U);
+ if (!User) {
+ DEBUG(dbgs() << "SLP: Gathering due unknown user. \n");
+ newTreeEntry(VL, false);
+ return;
+ }
+
+ // We don't care if the user is in a different basic block.
+ BasicBlock *UserBlock = User->getParent();
+ if (UserBlock != BB) {
+ DEBUG(dbgs() << "SLP: User from a different basic block "
+ << *User << ". \n");
+ continue;
+ }
+
+ // If this is a PHINode within this basic block then we can place the
+ // extract wherever we want.
+ if (isa<PHINode>(*User)) {
+ DEBUG(dbgs() << "SLP: \tWe can schedule PHIs:" << *User << ". \n");
+ continue;
+ }
+
+ // Check if this is a safe in-tree user.
+ if (ScalarToTreeEntry.count(User)) {
+ int Idx = ScalarToTreeEntry[User];
+ int VecLocation = VectorizableTree[Idx].LastScalarIndex;
+ if (VecLocation <= MyLastIndex) {
+ DEBUG(dbgs() << "SLP: Gathering due to unschedulable vector. \n");
+ newTreeEntry(VL, false);
+ return;
+ }
+ DEBUG(dbgs() << "SLP: In-tree user (" << *User << ") at #" <<
+ VecLocation << " vector value (" << *Scalar << ") at #"
+ << MyLastIndex << ".\n");
+ continue;
+ }
+
+ // This user is part of the reduction.
+ if (RdxOps && RdxOps->count(User))
+ continue;
+
+ // Make sure that we can schedule this unknown user.
+ BlockNumbering &BN = BlocksNumbers[BB];
+ int UserIndex = BN.getIndex(User);
+ if (UserIndex < MyLastIndex) {
+
+ DEBUG(dbgs() << "SLP: Can't schedule extractelement for "
+ << *User << ". \n");
+ newTreeEntry(VL, false);
+ return;
+ }
+ }
+ }
+
+ // Check that every instructions appears once in this bundle.
+ for (unsigned i = 0, e = VL.size(); i < e; ++i)
+ for (unsigned j = i+1; j < e; ++j)
+ if (VL[i] == VL[j]) {
+ DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
+ newTreeEntry(VL, false);
+ return;
+ }
+
+ // Check that instructions in this bundle don't reference other instructions.
+ // The runtime of this check is O(N * N-1 * uses(N)) and a typical N is 4.
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ for (Value::use_iterator U = VL[i]->use_begin(), UE = VL[i]->use_end();
+ U != UE; ++U) {
+ for (unsigned j = 0; j < e; ++j) {
+ if (i != j && *U == VL[j]) {
+ DEBUG(dbgs() << "SLP: Intra-bundle dependencies!" << **U << ". \n");
+ newTreeEntry(VL, false);
+ return;
+ }
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
+
+ unsigned Opcode = getSameOpcode(VL);
+
+ // Check if it is safe to sink the loads or the stores.
+ if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
+ Instruction *Last = getLastInstruction(VL);
+
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ if (VL[i] == Last)
+ continue;
+ Value *Barrier = getSinkBarrier(cast<Instruction>(VL[i]), Last);
+ if (Barrier) {
+ DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " << *Last
+ << "\n because of " << *Barrier << ". Gathering.\n");
+ newTreeEntry(VL, false);
+ return;
+ }
+ }
+ }
+
+ switch (Opcode) {
+ case Instruction::PHI: {
+ PHINode *PH = dyn_cast<PHINode>(VL0);
+
+ // Check for terminator values (e.g. invoke).
+ for (unsigned j = 0; j < VL.size(); ++j)
+ for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
+ TerminatorInst *Term = dyn_cast<TerminatorInst>(cast<PHINode>(VL[j])->getIncomingValue(i));
+ if (Term) {
+ DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n");
+ newTreeEntry(VL, false);
+ return;
+ }
+ }
+
+ newTreeEntry(VL, true);
+ DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
+
+ for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
+ ValueList Operands;
+ // Prepare the operand vector.
+ for (unsigned j = 0; j < VL.size(); ++j)
+ Operands.push_back(cast<PHINode>(VL[j])->getIncomingValue(i));
+
+ buildTree_rec(Operands, Depth + 1);
+ }
+ return;
+ }
+ case Instruction::ExtractElement: {
+ bool Reuse = CanReuseExtract(VL);
+ if (Reuse) {
+ DEBUG(dbgs() << "SLP: Reusing extract sequence.\n");
+ }
+ newTreeEntry(VL, Reuse);
+ return;
+ }
+ case Instruction::Load: {
+ // Check if the loads are consecutive or of we need to swizzle them.
+ for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
+ LoadInst *L = cast<LoadInst>(VL[i]);
+ if (!L->isSimple() || !isConsecutiveAccess(VL[i], VL[i + 1])) {
+ newTreeEntry(VL, false);
+ DEBUG(dbgs() << "SLP: Need to swizzle loads.\n");
+ return;
+ }
+ }
+ newTreeEntry(VL, true);
+ DEBUG(dbgs() << "SLP: added a vector of loads.\n");
+ return;
+ }
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ Type *SrcTy = VL0->getOperand(0)->getType();
+ for (unsigned i = 0; i < VL.size(); ++i) {
+ Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType();
+ if (Ty != SrcTy || Ty->isAggregateType() || Ty->isVectorTy()) {
+ newTreeEntry(VL, false);
+ DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n");
+ return;
+ }
+ }
+ newTreeEntry(VL, true);
+ DEBUG(dbgs() << "SLP: added a vector of casts.\n");
+
+ for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+ ValueList Operands;
+ // Prepare the operand vector.
+ for (unsigned j = 0; j < VL.size(); ++j)
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+
+ buildTree_rec(Operands, Depth+1);
+ }
+ return;
+ }
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ // Check that all of the compares have the same predicate.
+ CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate();
+ Type *ComparedTy = cast<Instruction>(VL[0])->getOperand(0)->getType();
+ for (unsigned i = 1, e = VL.size(); i < e; ++i) {
+ CmpInst *Cmp = cast<CmpInst>(VL[i]);
+ if (Cmp->getPredicate() != P0 ||
+ Cmp->getOperand(0)->getType() != ComparedTy) {
+ newTreeEntry(VL, false);
+ DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n");
+ return;
+ }
+ }
+
+ newTreeEntry(VL, true);
+ DEBUG(dbgs() << "SLP: added a vector of compares.\n");
+
+ for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+ ValueList Operands;
+ // Prepare the operand vector.
+ for (unsigned j = 0; j < VL.size(); ++j)
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+
+ buildTree_rec(Operands, Depth+1);
+ }
+ return;
+ }
+ case Instruction::Select:
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ newTreeEntry(VL, true);
+ DEBUG(dbgs() << "SLP: added a vector of bin op.\n");
+
+ // Sort operands of the instructions so that each side is more likely to
+ // have the same opcode.
+ if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
+ ValueList Left, Right;
+ reorderInputsAccordingToOpcode(VL, Left, Right);
+ buildTree_rec(Left, Depth + 1);
+ buildTree_rec(Right, Depth + 1);
+ return;
+ }
+
+ for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+ ValueList Operands;
+ // Prepare the operand vector.
+ for (unsigned j = 0; j < VL.size(); ++j)
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+
+ buildTree_rec(Operands, Depth+1);
+ }
+ return;
+ }
+ case Instruction::Store: {
+ // Check if the stores are consecutive or of we need to swizzle them.
+ for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
+ if (!isConsecutiveAccess(VL[i], VL[i + 1])) {
+ newTreeEntry(VL, false);
+ DEBUG(dbgs() << "SLP: Non consecutive store.\n");
+ return;
+ }
+
+ newTreeEntry(VL, true);
+ DEBUG(dbgs() << "SLP: added a vector of stores.\n");
+
+ ValueList Operands;
+ for (unsigned j = 0; j < VL.size(); ++j)
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
+
+ // We can ignore these values because we are sinking them down.
+ MemBarrierIgnoreList.insert(VL.begin(), VL.end());
+ buildTree_rec(Operands, Depth + 1);
+ return;
+ }
+ default:
+ newTreeEntry(VL, false);
+ DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
+ return;
+ }
+}
+
+int BoUpSLP::getEntryCost(TreeEntry *E) {
+ ArrayRef<Value*> VL = E->Scalars;
+
+ Type *ScalarTy = VL[0]->getType();
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+ ScalarTy = SI->getValueOperand()->getType();
+ VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
+
+ if (E->NeedToGather) {
+ if (allConstant(VL))
+ return 0;
+ if (isSplat(VL)) {
+ return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
+ }
+ return getGatherCost(E->Scalars);
+ }
+
+ assert(getSameOpcode(VL) && getSameType(VL) && getSameBlock(VL) &&
+ "Invalid VL");
+ Instruction *VL0 = cast<Instruction>(VL[0]);
+ unsigned Opcode = VL0->getOpcode();
+ switch (Opcode) {
+ case Instruction::PHI: {
+ return 0;
+ }
+ case Instruction::ExtractElement: {
+ if (CanReuseExtract(VL))
+ return 0;
+ return getGatherCost(VecTy);
+ }
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ Type *SrcTy = VL0->getOperand(0)->getType();
+
+ // Calculate the cost of this instruction.
+ int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),
+ VL0->getType(), SrcTy);
+
+ VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
+ int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy);
+ return VecCost - ScalarCost;
+ }
+ case Instruction::FCmp:
+ case Instruction::ICmp:
+ case Instruction::Select:
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ // Calculate the cost of this instruction.
+ int ScalarCost = 0;
+ int VecCost = 0;
+ if (Opcode == Instruction::FCmp || Opcode == Instruction::ICmp ||
+ Opcode == Instruction::Select) {
+ VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size());
+ ScalarCost = VecTy->getNumElements() *
+ TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty());
+ VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy);
+ } else {
+ // Certain instructions can be cheaper to vectorize if they have a
+ // constant second vector operand.
+ TargetTransformInfo::OperandValueKind Op1VK =
+ TargetTransformInfo::OK_AnyValue;
+ TargetTransformInfo::OperandValueKind Op2VK =
+ TargetTransformInfo::OK_UniformConstantValue;
+
+ // Check whether all second operands are constant.
+ for (unsigned i = 0; i < VL.size(); ++i)
+ if (!isa<ConstantInt>(cast<Instruction>(VL[i])->getOperand(1))) {
+ Op2VK = TargetTransformInfo::OK_AnyValue;
+ break;
+ }
+
+ ScalarCost =
+ VecTy->getNumElements() *
+ TTI->getArithmeticInstrCost(Opcode, ScalarTy, Op1VK, Op2VK);
+ VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy, Op1VK, Op2VK);
+ }
+ return VecCost - ScalarCost;
+ }
+ case Instruction::Load: {
+ // Cost of wide load - cost of scalar loads.
+ int ScalarLdCost = VecTy->getNumElements() *
+ TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
+ int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, 1, 0);
+ return VecLdCost - ScalarLdCost;
+ }
+ case Instruction::Store: {
+ // We know that we can merge the stores. Calculate the cost.
+ int ScalarStCost = VecTy->getNumElements() *
+ TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
+ int VecStCost = TTI->getMemoryOpCost(Instruction::Store, VecTy, 1, 0);
+ return VecStCost - ScalarStCost;
+ }
+ default:
+ llvm_unreachable("Unknown instruction");
+ }
+}
+
+bool BoUpSLP::isFullyVectorizableTinyTree() {
+ DEBUG(dbgs() << "SLP: Check whether the tree with height " <<
+ VectorizableTree.size() << " is fully vectorizable .\n");
+
+ // We only handle trees of height 2.
+ if (VectorizableTree.size() != 2)
+ return false;
+
+ // Gathering cost would be too much for tiny trees.
+ if (VectorizableTree[0].NeedToGather || VectorizableTree[1].NeedToGather)
+ return false;
+
+ return true;
+}
+
+int BoUpSLP::getTreeCost() {
+ int Cost = 0;
+ DEBUG(dbgs() << "SLP: Calculating cost for tree of size " <<
+ VectorizableTree.size() << ".\n");
+
+ // We only vectorize tiny trees if it is fully vectorizable.
+ if (VectorizableTree.size() < 3 && !isFullyVectorizableTinyTree()) {
+ if (!VectorizableTree.size()) {
+ assert(!ExternalUses.size() && "We should not have any external users");
+ }
+ return INT_MAX;
+ }
+
+ unsigned BundleWidth = VectorizableTree[0].Scalars.size();
+
+ for (unsigned i = 0, e = VectorizableTree.size(); i != e; ++i) {
+ int C = getEntryCost(&VectorizableTree[i]);
+ DEBUG(dbgs() << "SLP: Adding cost " << C << " for bundle that starts with "
+ << *VectorizableTree[i].Scalars[0] << " .\n");
+ Cost += C;
+ }
+
+ int ExtractCost = 0;
+ for (UserList::iterator I = ExternalUses.begin(), E = ExternalUses.end();
+ I != E; ++I) {
+
+ VectorType *VecTy = VectorType::get(I->Scalar->getType(), BundleWidth);
+ ExtractCost += TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy,
+ I->Lane);
+ }
+
+
+ DEBUG(dbgs() << "SLP: Total Cost " << Cost + ExtractCost<< ".\n");
+ return Cost + ExtractCost;
+}
+
+int BoUpSLP::getGatherCost(Type *Ty) {
+ int Cost = 0;
+ for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
+ Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+ return Cost;
+}
+
+int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) {
+ // Find the type of the operands in VL.
+ Type *ScalarTy = VL[0]->getType();
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+ ScalarTy = SI->getValueOperand()->getType();
+ VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
+ // Find the cost of inserting/extracting values from the vector.
+ return getGatherCost(VecTy);
+}
+
+AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return AA->getLocation(SI);
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return AA->getLocation(LI);
+ return AliasAnalysis::Location();
+}
+
+Value *BoUpSLP::getPointerOperand(Value *I) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->getPointerOperand();
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->getPointerOperand();
+ return 0;
+}
+
+unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
+ if (LoadInst *L = dyn_cast<LoadInst>(I))
+ return L->getPointerAddressSpace();
+ if (StoreInst *S = dyn_cast<StoreInst>(I))
+ return S->getPointerAddressSpace();
+ return -1;
+}
+
+bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
+ Value *PtrA = getPointerOperand(A);
+ Value *PtrB = getPointerOperand(B);
+ unsigned ASA = getAddressSpaceOperand(A);
+ unsigned ASB = getAddressSpaceOperand(B);
+
+ // Check that the address spaces match and that the pointers are valid.
+ if (!PtrA || !PtrB || (ASA != ASB))
+ return false;
+
+ // Make sure that A and B are different pointers of the same type.
+ if (PtrA == PtrB || PtrA->getType() != PtrB->getType())
+ return false;
+
+ unsigned PtrBitWidth = DL->getPointerSizeInBits(ASA);
+ Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
+ APInt Size(PtrBitWidth, DL->getTypeStoreSize(Ty));
+
+ APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
+ PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(*DL, OffsetA);
+ PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(*DL, OffsetB);
+
+ APInt OffsetDelta = OffsetB - OffsetA;
+
+ // Check if they are based on the same pointer. That makes the offsets
+ // sufficient.
+ if (PtrA == PtrB)
+ return OffsetDelta == Size;
+
+ // Compute the necessary base pointer delta to have the necessary final delta
+ // equal to the size.
+ APInt BaseDelta = Size - OffsetDelta;
+
+ // Otherwise compute the distance with SCEV between the base pointers.
+ const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
+ const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
+ const SCEV *C = SE->getConstant(BaseDelta);
+ const SCEV *X = SE->getAddExpr(PtrSCEVA, C);
+ return X == PtrSCEVB;
+}
+
+Value *BoUpSLP::getSinkBarrier(Instruction *Src, Instruction *Dst) {
+ assert(Src->getParent() == Dst->getParent() && "Not the same BB");
+ BasicBlock::iterator I = Src, E = Dst;
+ /// Scan all of the instruction from SRC to DST and check if
+ /// the source may alias.
+ for (++I; I != E; ++I) {
+ // Ignore store instructions that are marked as 'ignore'.
+ if (MemBarrierIgnoreList.count(I))
+ continue;
+ if (Src->mayWriteToMemory()) /* Write */ {
+ if (!I->mayReadOrWriteMemory())
+ continue;
+ } else /* Read */ {
+ if (!I->mayWriteToMemory())
+ continue;
+ }
+ AliasAnalysis::Location A = getLocation(&*I);
+ AliasAnalysis::Location B = getLocation(Src);
+
+ if (!A.Ptr || !B.Ptr || AA->alias(A, B))
+ return I;
+ }
+ return 0;
+}
+
+int BoUpSLP::getLastIndex(ArrayRef<Value *> VL) {
+ BasicBlock *BB = cast<Instruction>(VL[0])->getParent();
+ assert(BB == getSameBlock(VL) && BlocksNumbers.count(BB) && "Invalid block");
+ BlockNumbering &BN = BlocksNumbers[BB];
+
+ int MaxIdx = BN.getIndex(BB->getFirstNonPHI());
+ for (unsigned i = 0, e = VL.size(); i < e; ++i)
+ MaxIdx = std::max(MaxIdx, BN.getIndex(cast<Instruction>(VL[i])));
+ return MaxIdx;
+}
+
+Instruction *BoUpSLP::getLastInstruction(ArrayRef<Value *> VL) {
+ BasicBlock *BB = cast<Instruction>(VL[0])->getParent();
+ assert(BB == getSameBlock(VL) && BlocksNumbers.count(BB) && "Invalid block");
+ BlockNumbering &BN = BlocksNumbers[BB];
+
+ int MaxIdx = BN.getIndex(cast<Instruction>(VL[0]));
+ for (unsigned i = 1, e = VL.size(); i < e; ++i)
+ MaxIdx = std::max(MaxIdx, BN.getIndex(cast<Instruction>(VL[i])));
+ Instruction *I = BN.getInstruction(MaxIdx);
+ assert(I && "bad location");
+ return I;
+}
+
+void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
+ Instruction *VL0 = cast<Instruction>(VL[0]);
+ Instruction *LastInst = getLastInstruction(VL);
+ BasicBlock::iterator NextInst = LastInst;
+ ++NextInst;
+ Builder.SetInsertPoint(VL0->getParent(), NextInst);
+ Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+}
+
+Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
+ Value *Vec = UndefValue::get(Ty);
+ // Generate the 'InsertElement' instruction.
+ for (unsigned i = 0; i < Ty->getNumElements(); ++i) {
+ Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
+ if (Instruction *Insrt = dyn_cast<Instruction>(Vec)) {
+ GatherSeq.insert(Insrt);
+ CSEBlocks.insert(Insrt->getParent());
+
+ // Add to our 'need-to-extract' list.
+ if (ScalarToTreeEntry.count(VL[i])) {
+ int Idx = ScalarToTreeEntry[VL[i]];
+ TreeEntry *E = &VectorizableTree[Idx];
+ // Find which lane we need to extract.
+ int FoundLane = -1;
+ for (unsigned Lane = 0, LE = VL.size(); Lane != LE; ++Lane) {
+ // Is this the lane of the scalar that we are looking for ?
+ if (E->Scalars[Lane] == VL[i]) {
+ FoundLane = Lane;
+ break;
+ }
+ }
+ assert(FoundLane >= 0 && "Could not find the correct lane");
+ ExternalUses.push_back(ExternalUser(VL[i], Insrt, FoundLane));
+ }
+ }
+ }
+
+ return Vec;
+}
+
+Value *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL) const {
+ SmallDenseMap<Value*, int>::const_iterator Entry
+ = ScalarToTreeEntry.find(VL[0]);
+ if (Entry != ScalarToTreeEntry.end()) {
+ int Idx = Entry->second;
+ const TreeEntry *En = &VectorizableTree[Idx];
+ if (En->isSame(VL) && En->VectorizedValue)
+ return En->VectorizedValue;
+ }
+ return 0;
+}
+
+Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
+ if (ScalarToTreeEntry.count(VL[0])) {
+ int Idx = ScalarToTreeEntry[VL[0]];
+ TreeEntry *E = &VectorizableTree[Idx];
+ if (E->isSame(VL))
+ return vectorizeTree(E);
+ }
+
+ Type *ScalarTy = VL[0]->getType();
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+ ScalarTy = SI->getValueOperand()->getType();
+ VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
+
+ return Gather(VL, VecTy);
+}
+
+Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
+ IRBuilder<>::InsertPointGuard Guard(Builder);
+
+ if (E->VectorizedValue) {
+ DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n");
+ return E->VectorizedValue;
+ }
+
+ Instruction *VL0 = cast<Instruction>(E->Scalars[0]);
+ Type *ScalarTy = VL0->getType();
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL0))
+ ScalarTy = SI->getValueOperand()->getType();
+ VectorType *VecTy = VectorType::get(ScalarTy, E->Scalars.size());
+
+ if (E->NeedToGather) {
+ setInsertPointAfterBundle(E->Scalars);
+ return Gather(E->Scalars, VecTy);
+ }
+
+ unsigned Opcode = VL0->getOpcode();
+ assert(Opcode == getSameOpcode(E->Scalars) && "Invalid opcode");
+
+ switch (Opcode) {
+ case Instruction::PHI: {
+ PHINode *PH = dyn_cast<PHINode>(VL0);
+ Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI());
+ Builder.SetCurrentDebugLocation(PH->getDebugLoc());
+ PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues());
+ E->VectorizedValue = NewPhi;
+
+ // PHINodes may have multiple entries from the same block. We want to
+ // visit every block once.
+ SmallSet<BasicBlock*, 4> VisitedBBs;
+
+ for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
+ ValueList Operands;
+ BasicBlock *IBB = PH->getIncomingBlock(i);
+
+ if (!VisitedBBs.insert(IBB)) {
+ NewPhi->addIncoming(NewPhi->getIncomingValueForBlock(IBB), IBB);
+ continue;
+ }
+
+ // Prepare the operand vector.
+ for (unsigned j = 0; j < E->Scalars.size(); ++j)
+ Operands.push_back(cast<PHINode>(E->Scalars[j])->
+ getIncomingValueForBlock(IBB));
+
+ Builder.SetInsertPoint(IBB->getTerminator());
+ Builder.SetCurrentDebugLocation(PH->getDebugLoc());
+ Value *Vec = vectorizeTree(Operands);
+ NewPhi->addIncoming(Vec, IBB);
+ }
+
+ assert(NewPhi->getNumIncomingValues() == PH->getNumIncomingValues() &&
+ "Invalid number of incoming values");
+ return NewPhi;
+ }
+
+ case Instruction::ExtractElement: {
+ if (CanReuseExtract(E->Scalars)) {
+ Value *V = VL0->getOperand(0);
+ E->VectorizedValue = V;
+ return V;
+ }
+ return Gather(E->Scalars, VecTy);
+ }
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ ValueList INVL;
+ for (int i = 0, e = E->Scalars.size(); i < e; ++i)
+ INVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
+
+ setInsertPointAfterBundle(E->Scalars);
+
+ Value *InVec = vectorizeTree(INVL);
+
+ if (Value *V = alreadyVectorized(E->Scalars))
+ return V;
+
+ CastInst *CI = dyn_cast<CastInst>(VL0);
+ Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
+ E->VectorizedValue = V;
+ return V;
+ }
+ case Instruction::FCmp:
+ case Instruction::ICmp: {
+ ValueList LHSV, RHSV;
+ for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
+ LHSV.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
+ RHSV.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
+ }
+
+ setInsertPointAfterBundle(E->Scalars);
+
+ Value *L = vectorizeTree(LHSV);
+ Value *R = vectorizeTree(RHSV);
+
+ if (Value *V = alreadyVectorized(E->Scalars))
+ return V;
+
+ CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate();
+ Value *V;
+ if (Opcode == Instruction::FCmp)
+ V = Builder.CreateFCmp(P0, L, R);
+ else
+ V = Builder.CreateICmp(P0, L, R);
+
+ E->VectorizedValue = V;
+ return V;
+ }
+ case Instruction::Select: {
+ ValueList TrueVec, FalseVec, CondVec;
+ for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
+ CondVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
+ TrueVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
+ FalseVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(2));
+ }
+
+ setInsertPointAfterBundle(E->Scalars);
+
+ Value *Cond = vectorizeTree(CondVec);
+ Value *True = vectorizeTree(TrueVec);
+ Value *False = vectorizeTree(FalseVec);
+
+ if (Value *V = alreadyVectorized(E->Scalars))
+ return V;
+
+ Value *V = Builder.CreateSelect(Cond, True, False);
+ E->VectorizedValue = V;
+ return V;
+ }
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ ValueList LHSVL, RHSVL;
+ if (isa<BinaryOperator>(VL0) && VL0->isCommutative())
+ reorderInputsAccordingToOpcode(E->Scalars, LHSVL, RHSVL);
+ else
+ for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
+ LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
+ RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
+ }
+
+ setInsertPointAfterBundle(E->Scalars);
+
+ Value *LHS = vectorizeTree(LHSVL);
+ Value *RHS = vectorizeTree(RHSVL);
+
+ if (LHS == RHS && isa<Instruction>(LHS)) {
+ assert((VL0->getOperand(0) == VL0->getOperand(1)) && "Invalid order");
+ }
+
+ if (Value *V = alreadyVectorized(E->Scalars))
+ return V;
+
+ BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
+ Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS);
+ E->VectorizedValue = V;
+
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return propagateMetadata(I, E->Scalars);
+
+ return V;
+ }
+ case Instruction::Load: {
+ // Loads are inserted at the head of the tree because we don't want to
+ // sink them all the way down past store instructions.
+ setInsertPointAfterBundle(E->Scalars);
+
+ LoadInst *LI = cast<LoadInst>(VL0);
+ unsigned AS = LI->getPointerAddressSpace();
+
+ Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
+ VecTy->getPointerTo(AS));
+ unsigned Alignment = LI->getAlignment();
+ LI = Builder.CreateLoad(VecPtr);
+ LI->setAlignment(Alignment);
+ E->VectorizedValue = LI;
+ return propagateMetadata(LI, E->Scalars);
+ }
+ case Instruction::Store: {
+ StoreInst *SI = cast<StoreInst>(VL0);
+ unsigned Alignment = SI->getAlignment();
+ unsigned AS = SI->getPointerAddressSpace();
+
+ ValueList ValueOp;
+ for (int i = 0, e = E->Scalars.size(); i < e; ++i)
+ ValueOp.push_back(cast<StoreInst>(E->Scalars[i])->getValueOperand());
+
+ setInsertPointAfterBundle(E->Scalars);
+
+ Value *VecValue = vectorizeTree(ValueOp);
+ Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
+ VecTy->getPointerTo(AS));
+ StoreInst *S = Builder.CreateStore(VecValue, VecPtr);
+ S->setAlignment(Alignment);
+ E->VectorizedValue = S;
+ return propagateMetadata(S, E->Scalars);
+ }
+ default:
+ llvm_unreachable("unknown inst");
+ }
+ return 0;
+}
+
+Value *BoUpSLP::vectorizeTree() {
+ Builder.SetInsertPoint(F->getEntryBlock().begin());
+ vectorizeTree(&VectorizableTree[0]);
+
+ DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size() << " values .\n");
+
+ // Extract all of the elements with the external uses.
+ for (UserList::iterator it = ExternalUses.begin(), e = ExternalUses.end();
+ it != e; ++it) {
+ Value *Scalar = it->Scalar;
+ llvm::User *User = it->User;
+
+ // Skip users that we already RAUW. This happens when one instruction
+ // has multiple uses of the same value.
+ if (std::find(Scalar->use_begin(), Scalar->use_end(), User) ==
+ Scalar->use_end())
+ continue;
+ assert(ScalarToTreeEntry.count(Scalar) && "Invalid scalar");
+
+ int Idx = ScalarToTreeEntry[Scalar];
+ TreeEntry *E = &VectorizableTree[Idx];
+ assert(!E->NeedToGather && "Extracting from a gather list");
+
+ Value *Vec = E->VectorizedValue;
+ assert(Vec && "Can't find vectorizable value");
+
+ Value *Lane = Builder.getInt32(it->Lane);
+ // Generate extracts for out-of-tree users.
+ // Find the insertion point for the extractelement lane.
+ if (PHINode *PN = dyn_cast<PHINode>(Vec)) {
+ Builder.SetInsertPoint(PN->getParent()->getFirstInsertionPt());
+ Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+ CSEBlocks.insert(PN->getParent());
+ User->replaceUsesOfWith(Scalar, Ex);
+ } else if (isa<Instruction>(Vec)){
+ if (PHINode *PH = dyn_cast<PHINode>(User)) {
+ for (int i = 0, e = PH->getNumIncomingValues(); i != e; ++i) {
+ if (PH->getIncomingValue(i) == Scalar) {
+ Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator());
+ Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+ CSEBlocks.insert(PH->getIncomingBlock(i));
+ PH->setOperand(i, Ex);
+ }
+ }
+ } else {
+ Builder.SetInsertPoint(cast<Instruction>(User));
+ Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+ CSEBlocks.insert(cast<Instruction>(User)->getParent());
+ User->replaceUsesOfWith(Scalar, Ex);
+ }
+ } else {
+ Builder.SetInsertPoint(F->getEntryBlock().begin());
+ Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+ CSEBlocks.insert(&F->getEntryBlock());
+ User->replaceUsesOfWith(Scalar, Ex);
+ }
+
+ DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n");
+ }
+
+ // For each vectorized value:
+ for (int EIdx = 0, EE = VectorizableTree.size(); EIdx < EE; ++EIdx) {
+ TreeEntry *Entry = &VectorizableTree[EIdx];
+
+ // For each lane:
+ for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
+ Value *Scalar = Entry->Scalars[Lane];
+
+ // No need to handle users of gathered values.
+ if (Entry->NeedToGather)
+ continue;
+
+ assert(Entry->VectorizedValue && "Can't find vectorizable value");
+
+ Type *Ty = Scalar->getType();
+ if (!Ty->isVoidTy()) {
+ for (Value::use_iterator User = Scalar->use_begin(),
+ UE = Scalar->use_end(); User != UE; ++User) {
+ DEBUG(dbgs() << "SLP: \tvalidating user:" << **User << ".\n");
+
+ assert((ScalarToTreeEntry.count(*User) ||
+ // It is legal to replace the reduction users by undef.
+ (RdxOps && RdxOps->count(*User))) &&
+ "Replacing out-of-tree value with undef");
+ }
+ Value *Undef = UndefValue::get(Ty);
+ Scalar->replaceAllUsesWith(Undef);
+ }
+ DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n");
+ cast<Instruction>(Scalar)->eraseFromParent();
+ }
+ }
+
+ for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it) {
+ BlocksNumbers[it].forget();
+ }
+ Builder.ClearInsertionPoint();
+
+ return VectorizableTree[0].VectorizedValue;
+}
+
+class DTCmp {
+ const DominatorTree *DT;
+
+public:
+ DTCmp(const DominatorTree *DT) : DT(DT) {}
+ bool operator()(const BasicBlock *A, const BasicBlock *B) const {
+ return DT->properlyDominates(A, B);
+ }
+};
+
+void BoUpSLP::optimizeGatherSequence() {
+ DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size()
+ << " gather sequences instructions.\n");
+ // LICM InsertElementInst sequences.
+ for (SetVector<Instruction *>::iterator it = GatherSeq.begin(),
+ e = GatherSeq.end(); it != e; ++it) {
+ InsertElementInst *Insert = dyn_cast<InsertElementInst>(*it);
+
+ if (!Insert)
+ continue;
+
+ // Check if this block is inside a loop.
+ Loop *L = LI->getLoopFor(Insert->getParent());
+ if (!L)
+ continue;
+
+ // Check if it has a preheader.
+ BasicBlock *PreHeader = L->getLoopPreheader();
+ if (!PreHeader)
+ continue;
+
+ // If the vector or the element that we insert into it are
+ // instructions that are defined in this basic block then we can't
+ // hoist this instruction.
+ Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0));
+ Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1));
+ if (CurrVec && L->contains(CurrVec))
+ continue;
+ if (NewElem && L->contains(NewElem))
+ continue;
+
+ // We can hoist this instruction. Move it to the pre-header.
+ Insert->moveBefore(PreHeader->getTerminator());
+ }
+
+ // Sort blocks by domination. This ensures we visit a block after all blocks
+ // dominating it are visited.
+ SmallVector<BasicBlock *, 8> CSEWorkList(CSEBlocks.begin(), CSEBlocks.end());
+ std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(), DTCmp(DT));
+
+ // Perform O(N^2) search over the gather sequences and merge identical
+ // instructions. TODO: We can further optimize this scan if we split the
+ // instructions into different buckets based on the insert lane.
+ SmallVector<Instruction *, 16> Visited;
+ for (SmallVectorImpl<BasicBlock *>::iterator I = CSEWorkList.begin(),
+ E = CSEWorkList.end();
+ I != E; ++I) {
+ assert((I == CSEWorkList.begin() || !DT->dominates(*I, *llvm::prior(I))) &&
+ "Worklist not sorted properly!");
+ BasicBlock *BB = *I;
+ // For all instructions in blocks containing gather sequences:
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
+ Instruction *In = it++;
+ if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In))
+ continue;
+
+ // Check if we can replace this instruction with any of the
+ // visited instructions.
+ for (SmallVectorImpl<Instruction *>::iterator v = Visited.begin(),
+ ve = Visited.end();
+ v != ve; ++v) {
+ if (In->isIdenticalTo(*v) &&
+ DT->dominates((*v)->getParent(), In->getParent())) {
+ In->replaceAllUsesWith(*v);
+ In->eraseFromParent();
+ In = 0;
+ break;
+ }
+ }
+ if (In) {
+ assert(std::find(Visited.begin(), Visited.end(), In) == Visited.end());
+ Visited.push_back(In);
+ }
+ }
+ }
+ CSEBlocks.clear();
+ GatherSeq.clear();
+}
+
/// The SLPVectorizer Pass.
struct SLPVectorizer : public FunctionPass {
- typedef std::map<Value*, BoUpSLP::StoreList> StoreListMap;
+ typedef SmallVector<StoreInst *, 8> StoreList;
+ typedef MapVector<Value *, StoreList> StoreListMap;
/// Pass identification, replacement for typeid
static char ID;
@@ -61,6 +1764,7 @@ struct SLPVectorizer : public FunctionPass {
TargetTransformInfo *TTI;
AliasAnalysis *AA;
LoopInfo *LI;
+ DominatorTree *DT;
virtual bool runOnFunction(Function &F) {
SE = &getAnalysis<ScalarEvolution>();
@@ -68,41 +1772,50 @@ struct SLPVectorizer : public FunctionPass {
TTI = &getAnalysis<TargetTransformInfo>();
AA = &getAnalysis<AliasAnalysis>();
LI = &getAnalysis<LoopInfo>();
+ DT = &getAnalysis<DominatorTree>();
StoreRefs.clear();
bool Changed = false;
+ // If the target claims to have no vector registers don't attempt
+ // vectorization.
+ if (!TTI->getNumberOfRegisters(true))
+ return false;
+
// Must have DataLayout. We can't require it because some tests run w/o
// triple.
if (!DL)
return false;
- for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) {
- BasicBlock *BB = it;
- bool BBChanged = false;
+ // Don't vectorize when the attribute NoImplicitFloat is used.
+ if (F.hasFnAttribute(Attribute::NoImplicitFloat))
+ return false;
- // Use the bollom up slp vectorizer to construct chains that start with
- // he store instructions.
- BoUpSLP R(BB, SE, DL, TTI, AA, LI->getLoopFor(BB));
+ DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");
- // Vectorize trees that end at reductions.
- BBChanged |= vectorizeReductions(BB, R);
+ // Use the bollom up slp vectorizer to construct chains that start with
+ // he store instructions.
+ BoUpSLP R(&F, SE, DL, TTI, AA, LI, DT);
+
+ // Scan the blocks in the function in post order.
+ for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()),
+ e = po_end(&F.getEntryBlock()); it != e; ++it) {
+ BasicBlock *BB = *it;
// Vectorize trees that end at stores.
if (unsigned count = collectStores(BB, R)) {
(void)count;
- DEBUG(dbgs()<<"SLP: Found " << count << " stores to vectorize.\n");
- BBChanged |= vectorizeStoreChains(R);
+ DEBUG(dbgs() << "SLP: Found " << count << " stores to vectorize.\n");
+ Changed |= vectorizeStoreChains(R);
}
- // Try to hoist some of the scalarization code to the preheader.
- if (BBChanged) hoistGatherSequence(LI, BB, R);
-
- Changed |= BBChanged;
+ // Vectorize trees that end at reductions.
+ Changed |= vectorizeChainsInBlock(BB, R);
}
if (Changed) {
- DEBUG(dbgs()<<"SLP: vectorized \""<<F.getName()<<"\"\n");
+ R.optimizeGatherSequence();
+ DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n");
DEBUG(verifyFunction(F));
}
return Changed;
@@ -114,6 +1827,10 @@ struct SLPVectorizer : public FunctionPass {
AU.addRequired<AliasAnalysis>();
AU.addRequired<TargetTransformInfo>();
AU.addRequired<LoopInfo>();
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<LoopInfo>();
+ AU.addPreserved<DominatorTree>();
+ AU.setPreservesCFG();
}
private:
@@ -125,29 +1842,149 @@ private:
unsigned collectStores(BasicBlock *BB, BoUpSLP &R);
/// \brief Try to vectorize a chain that starts at two arithmetic instrs.
- bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
+ bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
/// \brief Try to vectorize a list of operands.
+ /// \returns true if a value was vectorized.
bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R);
/// \brief Try to vectorize a chain that may start at the operands of \V;
- bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
+ bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
/// \brief Vectorize the stores that were collected in StoreRefs.
bool vectorizeStoreChains(BoUpSLP &R);
- /// \brief Try to hoist gather sequences outside of the loop in cases where
- /// all of the sources are loop invariant.
- void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R);
+ /// \brief Scan the basic block and look for patterns that are likely to start
+ /// a vectorization chain.
+ bool vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R);
- /// \brief Scan the basic block and look for reductions that may start a
- /// vectorization chain.
- bool vectorizeReductions(BasicBlock *BB, BoUpSLP &R);
+ bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold,
+ BoUpSLP &R);
+ bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold,
+ BoUpSLP &R);
private:
StoreListMap StoreRefs;
};
+/// \brief Check that the Values in the slice in VL array are still existant in
+/// the WeakVH array.
+/// Vectorization of part of the VL array may cause later values in the VL array
+/// to become invalid. We track when this has happened in the WeakVH array.
+static bool hasValueBeenRAUWed(ArrayRef<Value *> &VL,
+ SmallVectorImpl<WeakVH> &VH,
+ unsigned SliceBegin,
+ unsigned SliceSize) {
+ for (unsigned i = SliceBegin; i < SliceBegin + SliceSize; ++i)
+ if (VH[i] != VL[i])
+ return true;
+
+ return false;
+}
+
+bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
+ int CostThreshold, BoUpSLP &R) {
+ unsigned ChainLen = Chain.size();
+ DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen
+ << "\n");
+ Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
+ unsigned Sz = DL->getTypeSizeInBits(StoreTy);
+ unsigned VF = MinVecRegSize / Sz;
+
+ if (!isPowerOf2_32(Sz) || VF < 2)
+ return false;
+
+ // Keep track of values that were delete by vectorizing in the loop below.
+ SmallVector<WeakVH, 8> TrackValues(Chain.begin(), Chain.end());
+
+ bool Changed = false;
+ // Look for profitable vectorizable trees at all offsets, starting at zero.
+ for (unsigned i = 0, e = ChainLen; i < e; ++i) {
+ if (i + VF > e)
+ break;
+
+ // Check that a previous iteration of this loop did not delete the Value.
+ if (hasValueBeenRAUWed(Chain, TrackValues, i, VF))
+ continue;
+
+ DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i
+ << "\n");
+ ArrayRef<Value *> Operands = Chain.slice(i, VF);
+
+ R.buildTree(Operands);
+
+ int Cost = R.getTreeCost();
+
+ DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
+ if (Cost < CostThreshold) {
+ DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
+ R.vectorizeTree();
+
+ // Move to the next bundle.
+ i += VF - 1;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
+ int costThreshold, BoUpSLP &R) {
+ SetVector<Value *> Heads, Tails;
+ SmallDenseMap<Value *, Value *> ConsecutiveChain;
+
+ // We may run into multiple chains that merge into a single chain. We mark the
+ // stores that we vectorized so that we don't visit the same store twice.
+ BoUpSLP::ValueSet VectorizedStores;
+ bool Changed = false;
+
+ // Do a quadratic search on all of the given stores and find
+ // all of the pairs of stores that follow each other.
+ for (unsigned i = 0, e = Stores.size(); i < e; ++i) {
+ for (unsigned j = 0; j < e; ++j) {
+ if (i == j)
+ continue;
+
+ if (R.isConsecutiveAccess(Stores[i], Stores[j])) {
+ Tails.insert(Stores[j]);
+ Heads.insert(Stores[i]);
+ ConsecutiveChain[Stores[i]] = Stores[j];
+ }
+ }
+ }
+
+ // For stores that start but don't end a link in the chain:
+ for (SetVector<Value *>::iterator it = Heads.begin(), e = Heads.end();
+ it != e; ++it) {
+ if (Tails.count(*it))
+ continue;
+
+ // We found a store instr that starts a chain. Now follow the chain and try
+ // to vectorize it.
+ BoUpSLP::ValueList Operands;
+ Value *I = *it;
+ // Collect the chain into a list.
+ while (Tails.count(I) || Heads.count(I)) {
+ if (VectorizedStores.count(I))
+ break;
+ Operands.push_back(I);
+ // Move to the next value in the chain.
+ I = ConsecutiveChain[I];
+ }
+
+ bool Vectorized = vectorizeStoreChain(Operands, costThreshold, R);
+
+ // Mark the vectorized stores so that we don't vectorize them again.
+ if (Vectorized)
+ VectorizedStores.insert(Operands.begin(), Operands.end());
+ Changed |= Vectorized;
+ }
+
+ return Changed;
+}
+
+
unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
unsigned count = 0;
StoreRefs.clear();
@@ -156,15 +1993,17 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
if (!SI)
continue;
+ // Don't touch volatile stores.
+ if (!SI->isSimple())
+ continue;
+
// Check that the pointer points to scalars.
Type *Ty = SI->getValueOperand()->getType();
if (Ty->isAggregateType() || Ty->isVectorTy())
return 0;
- // Find the base of the GEP.
- Value *Ptr = SI->getPointerOperand();
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
- Ptr = GEP->getPointerOperand();
+ // Find the base pointer.
+ Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), DL);
// Save the store locations.
StoreRefs[Ptr].push_back(SI);
@@ -173,34 +2012,83 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
return count;
}
-bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
- if (!A || !B) return false;
+bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
+ if (!A || !B)
+ return false;
Value *VL[] = { A, B };
return tryToVectorizeList(VL, R);
}
bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
- DEBUG(dbgs()<<"SLP: Vectorizing a list of length = " << VL.size() << ".\n");
+ if (VL.size() < 2)
+ return false;
+
+ DEBUG(dbgs() << "SLP: Vectorizing a list of length = " << VL.size() << ".\n");
+
+ // Check that all of the parts are scalar instructions of the same type.
+ Instruction *I0 = dyn_cast<Instruction>(VL[0]);
+ if (!I0)
+ return false;
+
+ unsigned Opcode0 = I0->getOpcode();
+
+ Type *Ty0 = I0->getType();
+ unsigned Sz = DL->getTypeSizeInBits(Ty0);
+ unsigned VF = MinVecRegSize / Sz;
- // Check that all of the parts are scalar.
for (int i = 0, e = VL.size(); i < e; ++i) {
Type *Ty = VL[i]->getType();
if (Ty->isAggregateType() || Ty->isVectorTy())
- return 0;
+ return false;
+ Instruction *Inst = dyn_cast<Instruction>(VL[i]);
+ if (!Inst || Inst->getOpcode() != Opcode0)
+ return false;
}
- int Cost = R.getTreeCost(VL);
- int ExtrCost = R.getScalarizationCost(VL);
- DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost <<
- " Cost of extract:" << ExtrCost << ".\n");
- if ((Cost+ExtrCost) >= -SLPCostThreshold) return false;
- DEBUG(dbgs()<<"SLP: Vectorizing pair.\n");
- R.vectorizeArith(VL);
- return true;
+ bool Changed = false;
+
+ // Keep track of values that were delete by vectorizing in the loop below.
+ SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end());
+
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ unsigned OpsWidth = 0;
+
+ if (i + VF > e)
+ OpsWidth = e - i;
+ else
+ OpsWidth = VF;
+
+ if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
+ break;
+
+ // Check that a previous iteration of this loop did not delete the Value.
+ if (hasValueBeenRAUWed(VL, TrackValues, i, OpsWidth))
+ continue;
+
+ DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
+ << "\n");
+ ArrayRef<Value *> Ops = VL.slice(i, OpsWidth);
+
+ R.buildTree(Ops);
+ int Cost = R.getTreeCost();
+
+ if (Cost < -SLPCostThreshold) {
+ DEBUG(dbgs() << "SLP: Vectorizing pair at cost:" << Cost << ".\n");
+ R.vectorizeTree();
+
+ // Move to the next bundle.
+ i += VF - 1;
+ Changed = true;
+ }
+ }
+
+ return Changed;
}
-bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
- if (!V) return false;
+bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
+ if (!V)
+ return false;
+
// Try to vectorize V.
if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
return true;
@@ -237,38 +2125,502 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
return 0;
}
-bool SLPVectorizer::vectorizeReductions(BasicBlock *BB, BoUpSLP &R) {
+/// \brief Generate a shuffle mask to be used in a reduction tree.
+///
+/// \param VecLen The length of the vector to be reduced.
+/// \param NumEltsToRdx The number of elements that should be reduced in the
+/// vector.
+/// \param IsPairwise Whether the reduction is a pairwise or splitting
+/// reduction. A pairwise reduction will generate a mask of
+/// <0,2,...> or <1,3,..> while a splitting reduction will generate
+/// <2,3, undef,undef> for a vector of 4 and NumElts = 2.
+/// \param IsLeft True will generate a mask of even elements, odd otherwise.
+static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx,
+ bool IsPairwise, bool IsLeft,
+ IRBuilder<> &Builder) {
+ assert((IsPairwise || !IsLeft) && "Don't support a <0,1,undef,...> mask");
+
+ SmallVector<Constant *, 32> ShuffleMask(
+ VecLen, UndefValue::get(Builder.getInt32Ty()));
+
+ if (IsPairwise)
+ // Build a mask of 0, 2, ... (left) or 1, 3, ... (right).
+ for (unsigned i = 0; i != NumEltsToRdx; ++i)
+ ShuffleMask[i] = Builder.getInt32(2 * i + !IsLeft);
+ else
+ // Move the upper half of the vector to the lower half.
+ for (unsigned i = 0; i != NumEltsToRdx; ++i)
+ ShuffleMask[i] = Builder.getInt32(NumEltsToRdx + i);
+
+ return ConstantVector::get(ShuffleMask);
+}
+
+
+/// Model horizontal reductions.
+///
+/// A horizontal reduction is a tree of reduction operations (currently add and
+/// fadd) that has operations that can be put into a vector as its leaf.
+/// For example, this tree:
+///
+/// mul mul mul mul
+/// \ / \ /
+/// + +
+/// \ /
+/// +
+/// This tree has "mul" as its reduced values and "+" as its reduction
+/// operations. A reduction might be feeding into a store or a binary operation
+/// feeding a phi.
+/// ...
+/// \ /
+/// +
+/// |
+/// phi +=
+///
+/// Or:
+/// ...
+/// \ /
+/// +
+/// |
+/// *p =
+///
+class HorizontalReduction {
+ SmallPtrSet<Value *, 16> ReductionOps;
+ SmallVector<Value *, 32> ReducedVals;
+
+ BinaryOperator *ReductionRoot;
+ PHINode *ReductionPHI;
+
+ /// The opcode of the reduction.
+ unsigned ReductionOpcode;
+ /// The opcode of the values we perform a reduction on.
+ unsigned ReducedValueOpcode;
+ /// The width of one full horizontal reduction operation.
+ unsigned ReduxWidth;
+ /// Should we model this reduction as a pairwise reduction tree or a tree that
+ /// splits the vector in halves and adds those halves.
+ bool IsPairwiseReduction;
+
+public:
+ HorizontalReduction()
+ : ReductionRoot(0), ReductionPHI(0), ReductionOpcode(0),
+ ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {}
+
+ /// \brief Try to find a reduction tree.
+ bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B,
+ DataLayout *DL) {
+ assert((!Phi ||
+ std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) &&
+ "Thi phi needs to use the binary operator");
+
+ // We could have a initial reductions that is not an add.
+ // r *= v1 + v2 + v3 + v4
+ // In such a case start looking for a tree rooted in the first '+'.
+ if (Phi) {
+ if (B->getOperand(0) == Phi) {
+ Phi = 0;
+ B = dyn_cast<BinaryOperator>(B->getOperand(1));
+ } else if (B->getOperand(1) == Phi) {
+ Phi = 0;
+ B = dyn_cast<BinaryOperator>(B->getOperand(0));
+ }
+ }
+
+ if (!B)
+ return false;
+
+ Type *Ty = B->getType();
+ if (Ty->isVectorTy())
+ return false;
+
+ ReductionOpcode = B->getOpcode();
+ ReducedValueOpcode = 0;
+ ReduxWidth = MinVecRegSize / DL->getTypeSizeInBits(Ty);
+ ReductionRoot = B;
+ ReductionPHI = Phi;
+
+ if (ReduxWidth < 4)
+ return false;
+
+ // We currently only support adds.
+ if (ReductionOpcode != Instruction::Add &&
+ ReductionOpcode != Instruction::FAdd)
+ return false;
+
+ // Post order traverse the reduction tree starting at B. We only handle true
+ // trees containing only binary operators.
+ SmallVector<std::pair<BinaryOperator *, unsigned>, 32> Stack;
+ Stack.push_back(std::make_pair(B, 0));
+ while (!Stack.empty()) {
+ BinaryOperator *TreeN = Stack.back().first;
+ unsigned EdgeToVist = Stack.back().second++;
+ bool IsReducedValue = TreeN->getOpcode() != ReductionOpcode;
+
+ // Only handle trees in the current basic block.
+ if (TreeN->getParent() != B->getParent())
+ return false;
+
+ // Each tree node needs to have one user except for the ultimate
+ // reduction.
+ if (!TreeN->hasOneUse() && TreeN != B)
+ return false;
+
+ // Postorder vist.
+ if (EdgeToVist == 2 || IsReducedValue) {
+ if (IsReducedValue) {
+ // Make sure that the opcodes of the operations that we are going to
+ // reduce match.
+ if (!ReducedValueOpcode)
+ ReducedValueOpcode = TreeN->getOpcode();
+ else if (ReducedValueOpcode != TreeN->getOpcode())
+ return false;
+ ReducedVals.push_back(TreeN);
+ } else {
+ // We need to be able to reassociate the adds.
+ if (!TreeN->isAssociative())
+ return false;
+ ReductionOps.insert(TreeN);
+ }
+ // Retract.
+ Stack.pop_back();
+ continue;
+ }
+
+ // Visit left or right.
+ Value *NextV = TreeN->getOperand(EdgeToVist);
+ BinaryOperator *Next = dyn_cast<BinaryOperator>(NextV);
+ if (Next)
+ Stack.push_back(std::make_pair(Next, 0));
+ else if (NextV != Phi)
+ return false;
+ }
+ return true;
+ }
+
+ /// \brief Attempt to vectorize the tree found by
+ /// matchAssociativeReduction.
+ bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
+ if (ReducedVals.empty())
+ return false;
+
+ unsigned NumReducedVals = ReducedVals.size();
+ if (NumReducedVals < ReduxWidth)
+ return false;
+
+ Value *VectorizedTree = 0;
+ IRBuilder<> Builder(ReductionRoot);
+ FastMathFlags Unsafe;
+ Unsafe.setUnsafeAlgebra();
+ Builder.SetFastMathFlags(Unsafe);
+ unsigned i = 0;
+
+ for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
+ ArrayRef<Value *> ValsToReduce(&ReducedVals[i], ReduxWidth);
+ V.buildTree(ValsToReduce, &ReductionOps);
+
+ // Estimate cost.
+ int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]);
+ if (Cost >= -SLPCostThreshold)
+ break;
+
+ DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:" << Cost
+ << ". (HorRdx)\n");
+
+ // Vectorize a tree.
+ DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc();
+ Value *VectorizedRoot = V.vectorizeTree();
+
+ // Emit a reduction.
+ Value *ReducedSubTree = emitReduction(VectorizedRoot, Builder);
+ if (VectorizedTree) {
+ Builder.SetCurrentDebugLocation(Loc);
+ VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree,
+ ReducedSubTree, "bin.rdx");
+ } else
+ VectorizedTree = ReducedSubTree;
+ }
+
+ if (VectorizedTree) {
+ // Finish the reduction.
+ for (; i < NumReducedVals; ++i) {
+ Builder.SetCurrentDebugLocation(
+ cast<Instruction>(ReducedVals[i])->getDebugLoc());
+ VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree,
+ ReducedVals[i]);
+ }
+ // Update users.
+ if (ReductionPHI) {
+ assert(ReductionRoot != NULL && "Need a reduction operation");
+ ReductionRoot->setOperand(0, VectorizedTree);
+ ReductionRoot->setOperand(1, ReductionPHI);
+ } else
+ ReductionRoot->replaceAllUsesWith(VectorizedTree);
+ }
+ return VectorizedTree != 0;
+ }
+
+private:
+
+ /// \brief Calcuate the cost of a reduction.
+ int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal) {
+ Type *ScalarTy = FirstReducedVal->getType();
+ Type *VecTy = VectorType::get(ScalarTy, ReduxWidth);
+
+ int PairwiseRdxCost = TTI->getReductionCost(ReductionOpcode, VecTy, true);
+ int SplittingRdxCost = TTI->getReductionCost(ReductionOpcode, VecTy, false);
+
+ IsPairwiseReduction = PairwiseRdxCost < SplittingRdxCost;
+ int VecReduxCost = IsPairwiseReduction ? PairwiseRdxCost : SplittingRdxCost;
+
+ int ScalarReduxCost =
+ ReduxWidth * TTI->getArithmeticInstrCost(ReductionOpcode, VecTy);
+
+ DEBUG(dbgs() << "SLP: Adding cost " << VecReduxCost - ScalarReduxCost
+ << " for reduction that starts with " << *FirstReducedVal
+ << " (It is a "
+ << (IsPairwiseReduction ? "pairwise" : "splitting")
+ << " reduction)\n");
+
+ return VecReduxCost - ScalarReduxCost;
+ }
+
+ static Value *createBinOp(IRBuilder<> &Builder, unsigned Opcode, Value *L,
+ Value *R, const Twine &Name = "") {
+ if (Opcode == Instruction::FAdd)
+ return Builder.CreateFAdd(L, R, Name);
+ return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, L, R, Name);
+ }
+
+ /// \brief Emit a horizontal reduction of the vectorized value.
+ Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder) {
+ assert(VectorizedValue && "Need to have a vectorized tree node");
+ Instruction *ValToReduce = dyn_cast<Instruction>(VectorizedValue);
+ assert(isPowerOf2_32(ReduxWidth) &&
+ "We only handle power-of-two reductions for now");
+
+ Value *TmpVec = ValToReduce;
+ for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {
+ if (IsPairwiseReduction) {
+ Value *LeftMask =
+ createRdxShuffleMask(ReduxWidth, i, true, true, Builder);
+ Value *RightMask =
+ createRdxShuffleMask(ReduxWidth, i, true, false, Builder);
+
+ Value *LeftShuf = Builder.CreateShuffleVector(
+ TmpVec, UndefValue::get(TmpVec->getType()), LeftMask, "rdx.shuf.l");
+ Value *RightShuf = Builder.CreateShuffleVector(
+ TmpVec, UndefValue::get(TmpVec->getType()), (RightMask),
+ "rdx.shuf.r");
+ TmpVec = createBinOp(Builder, ReductionOpcode, LeftShuf, RightShuf,
+ "bin.rdx");
+ } else {
+ Value *UpperHalf =
+ createRdxShuffleMask(ReduxWidth, i, false, false, Builder);
+ Value *Shuf = Builder.CreateShuffleVector(
+ TmpVec, UndefValue::get(TmpVec->getType()), UpperHalf, "rdx.shuf");
+ TmpVec = createBinOp(Builder, ReductionOpcode, TmpVec, Shuf, "bin.rdx");
+ }
+ }
+
+ // The result is in the first element of the vector.
+ return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
+ }
+};
+
+/// \brief Recognize construction of vectors like
+/// %ra = insertelement <4 x float> undef, float %s0, i32 0
+/// %rb = insertelement <4 x float> %ra, float %s1, i32 1
+/// %rc = insertelement <4 x float> %rb, float %s2, i32 2
+/// %rd = insertelement <4 x float> %rc, float %s3, i32 3
+///
+/// Returns true if it matches
+///
+static bool findBuildVector(InsertElementInst *IE,
+ SmallVectorImpl<Value *> &Ops) {
+ if (!isa<UndefValue>(IE->getOperand(0)))
+ return false;
+
+ while (true) {
+ Ops.push_back(IE->getOperand(1));
+
+ if (IE->use_empty())
+ return false;
+
+ InsertElementInst *NextUse = dyn_cast<InsertElementInst>(IE->use_back());
+ if (!NextUse)
+ return true;
+
+ // If this isn't the final use, make sure the next insertelement is the only
+ // use. It's OK if the final constructed vector is used multiple times
+ if (!IE->hasOneUse())
+ return false;
+
+ IE = NextUse;
+ }
+
+ return false;
+}
+
+static bool PhiTypeSorterFunc(Value *V, Value *V2) {
+ return V->getType() < V2->getType();
+}
+
+bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
bool Changed = false;
- for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
- if (isa<DbgInfoIntrinsic>(it)) continue;
+ SmallVector<Value *, 4> Incoming;
+ SmallSet<Value *, 16> VisitedInstrs;
+
+ bool HaveVectorizedPhiNodes = true;
+ while (HaveVectorizedPhiNodes) {
+ HaveVectorizedPhiNodes = false;
+
+ // Collect the incoming values from the PHIs.
+ Incoming.clear();
+ for (BasicBlock::iterator instr = BB->begin(), ie = BB->end(); instr != ie;
+ ++instr) {
+ PHINode *P = dyn_cast<PHINode>(instr);
+ if (!P)
+ break;
+
+ if (!VisitedInstrs.count(P))
+ Incoming.push_back(P);
+ }
+
+ // Sort by type.
+ std::stable_sort(Incoming.begin(), Incoming.end(), PhiTypeSorterFunc);
+
+ // Try to vectorize elements base on their type.
+ for (SmallVector<Value *, 4>::iterator IncIt = Incoming.begin(),
+ E = Incoming.end();
+ IncIt != E;) {
+
+ // Look for the next elements with the same type.
+ SmallVector<Value *, 4>::iterator SameTypeIt = IncIt;
+ while (SameTypeIt != E &&
+ (*SameTypeIt)->getType() == (*IncIt)->getType()) {
+ VisitedInstrs.insert(*SameTypeIt);
+ ++SameTypeIt;
+ }
+
+ // Try to vectorize them.
+ unsigned NumElts = (SameTypeIt - IncIt);
+ DEBUG(errs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n");
+ if (NumElts > 1 &&
+ tryToVectorizeList(ArrayRef<Value *>(IncIt, NumElts), R)) {
+ // Success start over because instructions might have been changed.
+ HaveVectorizedPhiNodes = true;
+ Changed = true;
+ break;
+ }
+
+ // Start over at the next instruction of a differnt type (or the end).
+ IncIt = SameTypeIt;
+ }
+ }
+
+ VisitedInstrs.clear();
+
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {
+ // We may go through BB multiple times so skip the one we have checked.
+ if (!VisitedInstrs.insert(it))
+ continue;
+
+ if (isa<DbgInfoIntrinsic>(it))
+ continue;
// Try to vectorize reductions that use PHINodes.
if (PHINode *P = dyn_cast<PHINode>(it)) {
// Check that the PHI is a reduction PHI.
- if (P->getNumIncomingValues() != 2) return Changed;
- Value *Rdx = (P->getIncomingBlock(0) == BB ? P->getIncomingValue(0) :
- (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) :
- 0));
+ if (P->getNumIncomingValues() != 2)
+ return Changed;
+ Value *Rdx =
+ (P->getIncomingBlock(0) == BB
+ ? (P->getIncomingValue(0))
+ : (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) : 0));
// Check if this is a Binary Operator.
BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
if (!BI)
continue;
- Value *Inst = BI->getOperand(0);
- if (Inst == P) Inst = BI->getOperand(1);
- Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R);
+ // Try to match and vectorize a horizontal reduction.
+ HorizontalReduction HorRdx;
+ if (ShouldVectorizeHor &&
+ HorRdx.matchAssociativeReduction(P, BI, DL) &&
+ HorRdx.tryToReduce(R, TTI)) {
+ Changed = true;
+ it = BB->begin();
+ e = BB->end();
+ continue;
+ }
+
+ Value *Inst = BI->getOperand(0);
+ if (Inst == P)
+ Inst = BI->getOperand(1);
+
+ if (tryToVectorize(dyn_cast<BinaryOperator>(Inst), R)) {
+ // We would like to start over since some instructions are deleted
+ // and the iterator may become invalid value.
+ Changed = true;
+ it = BB->begin();
+ e = BB->end();
+ continue;
+ }
+
continue;
}
+ // Try to vectorize horizontal reductions feeding into a store.
+ if (ShouldStartVectorizeHorAtStore)
+ if (StoreInst *SI = dyn_cast<StoreInst>(it))
+ if (BinaryOperator *BinOp =
+ dyn_cast<BinaryOperator>(SI->getValueOperand())) {
+ HorizontalReduction HorRdx;
+ if (((HorRdx.matchAssociativeReduction(0, BinOp, DL) &&
+ HorRdx.tryToReduce(R, TTI)) ||
+ tryToVectorize(BinOp, R))) {
+ Changed = true;
+ it = BB->begin();
+ e = BB->end();
+ continue;
+ }
+ }
+
// Try to vectorize trees that start at compare instructions.
if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
- Changed |= true;
+ Changed = true;
+ // We would like to start over since some instructions are deleted
+ // and the iterator may become invalid value.
+ it = BB->begin();
+ e = BB->end();
continue;
}
- for (int i = 0; i < 2; ++i)
- if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i)))
- Changed |= tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R);
+
+ for (int i = 0; i < 2; ++i) {
+ if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) {
+ if (tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R)) {
+ Changed = true;
+ // We would like to start over since some instructions are deleted
+ // and the iterator may become invalid value.
+ it = BB->begin();
+ e = BB->end();
+ }
+ }
+ }
+ continue;
+ }
+
+ // Try to vectorize trees that start at insertelement instructions.
+ if (InsertElementInst *IE = dyn_cast<InsertElementInst>(it)) {
+ SmallVector<Value *, 8> Ops;
+ if (!findBuildVector(IE, Ops))
+ continue;
+
+ if (tryToVectorizeList(Ops, R)) {
+ Changed = true;
+ it = BB->begin();
+ e = BB->end();
+ }
+
continue;
}
}
@@ -284,51 +2636,19 @@ bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
if (it->second.size() < 2)
continue;
- DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " <<
- it->second.size() << ".\n");
+ DEBUG(dbgs() << "SLP: Analyzing a store chain of length "
+ << it->second.size() << ".\n");
- Changed |= R.vectorizeStores(it->second, -SLPCostThreshold);
+ // Process the stores in chunks of 16.
+ for (unsigned CI = 0, CE = it->second.size(); CI < CE; CI+=16) {
+ unsigned Len = std::min<unsigned>(CE - CI, 16);
+ ArrayRef<StoreInst *> Chunk(&it->second[CI], Len);
+ Changed |= vectorizeStores(Chunk, -SLPCostThreshold, R);
+ }
}
return Changed;
}
-void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB,
- BoUpSLP &R) {
- // Check if this block is inside a loop.
- Loop *L = LI->getLoopFor(BB);
- if (!L)
- return;
-
- // Check if it has a preheader.
- BasicBlock *PreHeader = L->getLoopPreheader();
- if (!PreHeader)
- return;
-
- // Mark the insertion point for the block.
- Instruction *Location = PreHeader->getTerminator();
-
- BoUpSLP::ValueList &Gathers = R.getGatherSeqInstructions();
- for (BoUpSLP::ValueList::iterator it = Gathers.begin(), e = Gathers.end();
- it != e; ++it) {
- InsertElementInst *Insert = dyn_cast<InsertElementInst>(*it);
-
- // The InsertElement sequence can be simplified into a constant.
- if (!Insert)
- continue;
-
- // If the vector or the element that we insert into it are
- // instructions that are defined in this basic block then we can't
- // hoist this instruction.
- Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0));
- Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1));
- if (CurrVec && L->contains(CurrVec)) continue;
- if (NewElem && L->contains(NewElem)) continue;
-
- // We can hoist this instruction. Move it to the pre-header.
- Insert->moveBefore(Location);
- }
-}
-
} // end anonymous namespace
char SLPVectorizer::ID = 0;
@@ -341,8 +2661,5 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
namespace llvm {
- Pass *createSLPVectorizerPass() {
- return new SLPVectorizer();
- }
+Pass *createSLPVectorizerPass() { return new SLPVectorizer(); }
}
-
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp b/contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp
deleted file mode 100644
index 9b94366..0000000
--- a/contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp
+++ /dev/null
@@ -1,730 +0,0 @@
-//===- VecUtils.cpp --- Vectorization Utilities ---------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "SLP"
-
-#include "VecUtils.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include <algorithm>
-#include <map>
-
-using namespace llvm;
-
-static const unsigned MinVecRegSize = 128;
-
-static const unsigned RecursionMaxDepth = 6;
-
-namespace llvm {
-
-BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
- TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) :
- BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) {
- numberInstructions();
-}
-
-void BoUpSLP::numberInstructions() {
- int Loc = 0;
- InstrIdx.clear();
- InstrVec.clear();
- // Number the instructions in the block.
- for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) {
- InstrIdx[it] = Loc++;
- InstrVec.push_back(it);
- assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
- }
-}
-
-Value *BoUpSLP::getPointerOperand(Value *I) {
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand();
- if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand();
- return 0;
-}
-
-unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
- if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace();
- if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace();
- return -1;
-}
-
-bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
- Value *PtrA = getPointerOperand(A);
- Value *PtrB = getPointerOperand(B);
- unsigned ASA = getAddressSpaceOperand(A);
- unsigned ASB = getAddressSpaceOperand(B);
-
- // Check that the address spaces match and that the pointers are valid.
- if (!PtrA || !PtrB || (ASA != ASB)) return false;
-
- // Check that A and B are of the same type.
- if (PtrA->getType() != PtrB->getType()) return false;
-
- // Calculate the distance.
- const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
- const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
- const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB);
- const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);
-
- // Non constant distance.
- if (!ConstOffSCEV) return false;
-
- int64_t Offset = ConstOffSCEV->getValue()->getSExtValue();
- Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
- // The Instructions are connsecutive if the size of the first load/store is
- // the same as the offset.
- int64_t Sz = DL->getTypeStoreSize(Ty);
- return ((-Offset) == Sz);
-}
-
-bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {
- Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
- unsigned Sz = DL->getTypeSizeInBits(StoreTy);
- unsigned VF = MinVecRegSize / Sz;
-
- if (!isPowerOf2_32(Sz) || VF < 2) return false;
-
- bool Changed = false;
- // Look for profitable vectorizable trees at all offsets, starting at zero.
- for (unsigned i = 0, e = Chain.size(); i < e; ++i) {
- if (i + VF > e) return Changed;
- DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n");
- ArrayRef<Value *> Operands = Chain.slice(i, VF);
-
- int Cost = getTreeCost(Operands);
- DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
- if (Cost < CostThreshold) {
- DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
- vectorizeTree(Operands, VF);
- i += VF - 1;
- Changed = true;
- }
- }
-
- return Changed;
-}
-
-bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {
- ValueSet Heads, Tails;
- SmallDenseMap<Value*, Value*> ConsecutiveChain;
-
- // We may run into multiple chains that merge into a single chain. We mark the
- // stores that we vectorized so that we don't visit the same store twice.
- ValueSet VectorizedStores;
- bool Changed = false;
-
- // Do a quadratic search on all of the given stores and find
- // all of the pairs of loads that follow each other.
- for (unsigned i = 0, e = Stores.size(); i < e; ++i)
- for (unsigned j = 0; j < e; ++j) {
- if (i == j) continue;
- if (isConsecutiveAccess(Stores[i], Stores[j])) {
- Tails.insert(Stores[j]);
- Heads.insert(Stores[i]);
- ConsecutiveChain[Stores[i]] = Stores[j];
- }
- }
-
- // For stores that start but don't end a link in the chain:
- for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) {
- if (Tails.count(*it)) continue;
-
- // We found a store instr that starts a chain. Now follow the chain and try
- // to vectorize it.
- ValueList Operands;
- Value *I = *it;
- // Collect the chain into a list.
- while (Tails.count(I) || Heads.count(I)) {
- if (VectorizedStores.count(I)) break;
- Operands.push_back(I);
- // Move to the next value in the chain.
- I = ConsecutiveChain[I];
- }
-
- bool Vectorized = vectorizeStoreChain(Operands, costThreshold);
-
- // Mark the vectorized stores so that we don't vectorize them again.
- if (Vectorized)
- VectorizedStores.insert(Operands.begin(), Operands.end());
- Changed |= Vectorized;
- }
-
- return Changed;
-}
-
-int BoUpSLP::getScalarizationCost(ArrayRef<Value *> VL) {
- // Find the type of the operands in VL.
- Type *ScalarTy = VL[0]->getType();
- if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
- ScalarTy = SI->getValueOperand()->getType();
- VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
- // Find the cost of inserting/extracting values from the vector.
- return getScalarizationCost(VecTy);
-}
-
-int BoUpSLP::getScalarizationCost(Type *Ty) {
- int Cost = 0;
- for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
- Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
- return Cost;
-}
-
-AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {
- if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI);
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI);
- return AliasAnalysis::Location();
-}
-
-Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {
- assert(Src->getParent() == Dst->getParent() && "Not the same BB");
- BasicBlock::iterator I = Src, E = Dst;
- /// Scan all of the instruction from SRC to DST and check if
- /// the source may alias.
- for (++I; I != E; ++I) {
- // Ignore store instructions that are marked as 'ignore'.
- if (MemBarrierIgnoreList.count(I)) continue;
- if (Src->mayWriteToMemory()) /* Write */ {
- if (!I->mayReadOrWriteMemory()) continue;
- } else /* Read */ {
- if (!I->mayWriteToMemory()) continue;
- }
- AliasAnalysis::Location A = getLocation(&*I);
- AliasAnalysis::Location B = getLocation(Src);
-
- if (!A.Ptr || !B.Ptr || AA->alias(A, B))
- return I;
- }
- return 0;
-}
-
-void BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) {
- Value *Vec = vectorizeTree(Operands, Operands.size());
- BasicBlock::iterator Loc = cast<Instruction>(Vec);
- IRBuilder<> Builder(++Loc);
- // After vectorizing the operands we need to generate extractelement
- // instructions and replace all of the uses of the scalar values with
- // the values that we extracted from the vectorized tree.
- for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
- Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i));
- Operands[i]->replaceAllUsesWith(S);
- }
-}
-
-int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
- // Get rid of the list of stores that were removed, and from the
- // lists of instructions with multiple users.
- MemBarrierIgnoreList.clear();
- LaneMap.clear();
- MultiUserVals.clear();
- MustScalarize.clear();
-
- // Scan the tree and find which value is used by which lane, and which values
- // must be scalarized.
- getTreeUses_rec(VL, 0);
-
- // Check that instructions with multiple users can be vectorized. Mark unsafe
- // instructions.
- for (ValueSet::iterator it = MultiUserVals.begin(),
- e = MultiUserVals.end(); it != e; ++it) {
- // Check that all of the users of this instr are within the tree
- // and that they are all from the same lane.
- int Lane = -1;
- for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
- I != E; ++I) {
- if (LaneMap.find(*I) == LaneMap.end()) {
- MustScalarize.insert(*it);
- DEBUG(dbgs()<<"SLP: Adding " << **it <<
- " to MustScalarize because of an out of tree usage.\n");
- break;
- }
- if (Lane == -1) Lane = LaneMap[*I];
- if (Lane != LaneMap[*I]) {
- MustScalarize.insert(*it);
- DEBUG(dbgs()<<"Adding " << **it <<
- " to MustScalarize because multiple lane use it: "
- << Lane << " and " << LaneMap[*I] << ".\n");
- break;
- }
- }
- }
-
- // Now calculate the cost of vectorizing the tree.
- return getTreeCost_rec(VL, 0);
-}
-
-void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
- if (Depth == RecursionMaxDepth) return;
-
- // Don't handle vectors.
- if (VL[0]->getType()->isVectorTy()) return;
- if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
- if (SI->getValueOperand()->getType()->isVectorTy()) return;
-
- // Check if all of the operands are constants.
- bool AllConst = true;
- bool AllSameScalar = true;
- for (unsigned i = 0, e = VL.size(); i < e; ++i) {
- AllConst &= isa<Constant>(VL[i]);
- AllSameScalar &= (VL[0] == VL[i]);
- Instruction *I = dyn_cast<Instruction>(VL[i]);
- // If one of the instructions is out of this BB, we need to scalarize all.
- if (I && I->getParent() != BB) return;
- }
-
- // If all of the operands are identical or constant we have a simple solution.
- if (AllConst || AllSameScalar) return;
-
- // Scalarize unknown structures.
- Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
- if (!VL0) return;
-
- unsigned Opcode = VL0->getOpcode();
- for (unsigned i = 0, e = VL.size(); i < e; ++i) {
- Instruction *I = dyn_cast<Instruction>(VL[i]);
- // If not all of the instructions are identical then we have to scalarize.
- if (!I || Opcode != I->getOpcode()) return;
- }
-
- // Mark instructions with multiple users.
- for (unsigned i = 0, e = VL.size(); i < e; ++i) {
- Instruction *I = dyn_cast<Instruction>(VL[i]);
- // Remember to check if all of the users of this instr are vectorized
- // within our tree.
- if (I && I->getNumUses() > 1) MultiUserVals.insert(I);
- }
-
- for (int i = 0, e = VL.size(); i < e; ++i) {
- // Check that the instruction is only used within
- // one lane.
- if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return;
- // Make this instruction as 'seen' and remember the lane.
- LaneMap[VL[i]] = i;
- }
-
- switch (Opcode) {
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::FPExt:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- case Instruction::Trunc:
- case Instruction::FPTrunc:
- case Instruction::BitCast:
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
- for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
- ValueList Operands;
- // Prepare the operand vector.
- for (unsigned j = 0; j < VL.size(); ++j)
- Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
-
- getTreeUses_rec(Operands, Depth+1);
- }
- return;
- }
- case Instruction::Store: {
- ValueList Operands;
- for (unsigned j = 0; j < VL.size(); ++j)
- Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
- getTreeUses_rec(Operands, Depth+1);
- return;
- }
- default:
- return;
- }
-}
-
-int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
- Type *ScalarTy = VL[0]->getType();
-
- if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
- ScalarTy = SI->getValueOperand()->getType();
-
- /// Don't mess with vectors.
- if (ScalarTy->isVectorTy()) return max_cost;
- VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
-
- if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy);
-
- // Check if all of the operands are constants.
- bool AllConst = true;
- bool AllSameScalar = true;
- bool MustScalarizeFlag = false;
- for (unsigned i = 0, e = VL.size(); i < e; ++i) {
- AllConst &= isa<Constant>(VL[i]);
- AllSameScalar &= (VL[0] == VL[i]);
- // Must have a single use.
- Instruction *I = dyn_cast<Instruction>(VL[i]);
- MustScalarizeFlag |= MustScalarize.count(VL[i]);
- // This instruction is outside the basic block.
- if (I && I->getParent() != BB)
- return getScalarizationCost(VecTy);
- }
-
- // Is this a simple vector constant.
- if (AllConst) return 0;
-
- // If all of the operands are identical we can broadcast them.
- Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
- if (AllSameScalar) {
- // If we are in a loop, and this is not an instruction (e.g. constant or
- // argument) or the instruction is defined outside the loop then assume
- // that the cost is zero.
- if (L && (!VL0 || !L->contains(VL0)))
- return 0;
-
- // We need to broadcast the scalar.
- return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
- }
-
- // If this is not a constant, or a scalar from outside the loop then we
- // need to scalarize it.
- if (MustScalarizeFlag)
- return getScalarizationCost(VecTy);
-
- if (!VL0) return getScalarizationCost(VecTy);
- assert(VL0->getParent() == BB && "Wrong BB");
-
- unsigned Opcode = VL0->getOpcode();
- for (unsigned i = 0, e = VL.size(); i < e; ++i) {
- Instruction *I = dyn_cast<Instruction>(VL[i]);
- // If not all of the instructions are identical then we have to scalarize.
- if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy);
- }
-
- // Check if it is safe to sink the loads or the stores.
- if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
- int MaxIdx = InstrIdx[VL0];
- for (unsigned i = 1, e = VL.size(); i < e; ++i )
- MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
-
- Instruction *Last = InstrVec[MaxIdx];
- for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
- if (VL[i] == Last) continue;
- Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
- if (Barrier) {
- DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " <<
- *Last << "\n because of " << *Barrier << "\n");
- return max_cost;
- }
- }
- }
-
- switch (Opcode) {
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::FPExt:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- case Instruction::Trunc:
- case Instruction::FPTrunc:
- case Instruction::BitCast: {
- int Cost = 0;
- ValueList Operands;
- Type *SrcTy = VL0->getOperand(0)->getType();
- // Prepare the operand vector.
- for (unsigned j = 0; j < VL.size(); ++j) {
- Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
- // Check that the casted type is the same for all users.
- if (cast<Instruction>(VL[j])->getOperand(0)->getType() != SrcTy)
- return getScalarizationCost(VecTy);
- }
-
- Cost += getTreeCost_rec(Operands, Depth+1);
- if (Cost >= max_cost) return max_cost;
-
- // Calculate the cost of this instruction.
- int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),
- VL0->getType(), SrcTy);
-
- VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
- int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy);
- Cost += (VecCost - ScalarCost);
- return Cost;
- }
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
- int Cost = 0;
- // Calculate the cost of all of the operands.
- for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
- ValueList Operands;
- // Prepare the operand vector.
- for (unsigned j = 0; j < VL.size(); ++j)
- Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
-
- Cost += getTreeCost_rec(Operands, Depth+1);
- if (Cost >= max_cost) return max_cost;
- }
-
- // Calculate the cost of this instruction.
- int ScalarCost = VecTy->getNumElements() *
- TTI->getArithmeticInstrCost(Opcode, ScalarTy);
-
- int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
- Cost += (VecCost - ScalarCost);
- return Cost;
- }
- case Instruction::Load: {
- // If we are scalarize the loads, add the cost of forming the vector.
- for (unsigned i = 0, e = VL.size()-1; i < e; ++i)
- if (!isConsecutiveAccess(VL[i], VL[i+1]))
- return getScalarizationCost(VecTy);
-
- // Cost of wide load - cost of scalar loads.
- int ScalarLdCost = VecTy->getNumElements() *
- TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
- int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
- return VecLdCost - ScalarLdCost;
- }
- case Instruction::Store: {
- // We know that we can merge the stores. Calculate the cost.
- int ScalarStCost = VecTy->getNumElements() *
- TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
- int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0);
- int StoreCost = VecStCost - ScalarStCost;
-
- ValueList Operands;
- for (unsigned j = 0; j < VL.size(); ++j) {
- Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
- MemBarrierIgnoreList.insert(VL[j]);
- }
-
- int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1);
- return TotalCost;
- }
- default:
- // Unable to vectorize unknown instructions.
- return getScalarizationCost(VecTy);
- }
-}
-
-Instruction *BoUpSLP::GetLastInstr(ArrayRef<Value *> VL, unsigned VF) {
- int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
- for (unsigned i = 0; i < VF; ++i )
- MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
- return InstrVec[MaxIdx + 1];
-}
-
-Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
- IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
- Value *Vec = UndefValue::get(Ty);
- for (unsigned i=0; i < Ty->getNumElements(); ++i) {
- // Generate the 'InsertElement' instruction.
- Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
- // Remember that this instruction is used as part of a 'gather' sequence.
- // The caller of the bottom-up slp vectorizer can try to hoist the sequence
- // if the users are outside of the basic block.
- GatherInstructions.push_back(Vec);
- }
-
- return Vec;
-}
-
-Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
- Value *V = vectorizeTree_rec(VL, VF);
- // We moved some instructions around. We have to number them again
- // before we can do any analysis.
- numberInstructions();
- MustScalarize.clear();
- return V;
-}
-
-Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
- Type *ScalarTy = VL[0]->getType();
- if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
- ScalarTy = SI->getValueOperand()->getType();
- VectorType *VecTy = VectorType::get(ScalarTy, VF);
-
- // Check if all of the operands are constants or identical.
- bool AllConst = true;
- bool AllSameScalar = true;
- for (unsigned i = 0, e = VF; i < e; ++i) {
- AllConst &= isa<Constant>(VL[i]);
- AllSameScalar &= (VL[0] == VL[i]);
- // The instruction must be in the same BB, and it must be vectorizable.
- Instruction *I = dyn_cast<Instruction>(VL[i]);
- if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB))
- return Scalarize(VL, VecTy);
- }
-
- // Check that this is a simple vector constant.
- if (AllConst || AllSameScalar) return Scalarize(VL, VecTy);
-
- // Scalarize unknown structures.
- Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
- if (!VL0) return Scalarize(VL, VecTy);
-
- if (VectorizedValues.count(VL0)) return VectorizedValues[VL0];
-
- unsigned Opcode = VL0->getOpcode();
- for (unsigned i = 0, e = VF; i < e; ++i) {
- Instruction *I = dyn_cast<Instruction>(VL[i]);
- // If not all of the instructions are identical then we have to scalarize.
- if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy);
- }
-
- switch (Opcode) {
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::FPExt:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- case Instruction::Trunc:
- case Instruction::FPTrunc:
- case Instruction::BitCast: {
- ValueList INVL;
- for (int i = 0; i < VF; ++i)
- INVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
- Value *InVec = vectorizeTree_rec(INVL, VF);
- IRBuilder<> Builder(GetLastInstr(VL, VF));
- CastInst *CI = dyn_cast<CastInst>(VL0);
- Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
- VectorizedValues[VL0] = V;
- return V;
- }
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
- ValueList LHSVL, RHSVL;
- for (int i = 0; i < VF; ++i) {
- RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
- LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
- }
-
- Value *RHS = vectorizeTree_rec(RHSVL, VF);
- Value *LHS = vectorizeTree_rec(LHSVL, VF);
- IRBuilder<> Builder(GetLastInstr(VL, VF));
- BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
- Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS);
- VectorizedValues[VL0] = V;
- return V;
- }
- case Instruction::Load: {
- LoadInst *LI = cast<LoadInst>(VL0);
- unsigned Alignment = LI->getAlignment();
-
- // Check if all of the loads are consecutive.
- for (unsigned i = 1, e = VF; i < e; ++i)
- if (!isConsecutiveAccess(VL[i-1], VL[i]))
- return Scalarize(VL, VecTy);
-
- IRBuilder<> Builder(GetLastInstr(VL, VF));
- Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
- VecTy->getPointerTo());
- LI = Builder.CreateLoad(VecPtr);
- LI->setAlignment(Alignment);
- VectorizedValues[VL0] = LI;
- return LI;
- }
- case Instruction::Store: {
- StoreInst *SI = cast<StoreInst>(VL0);
- unsigned Alignment = SI->getAlignment();
-
- ValueList ValueOp;
- for (int i = 0; i < VF; ++i)
- ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
-
- Value *VecValue = vectorizeTree_rec(ValueOp, VF);
-
- IRBuilder<> Builder(GetLastInstr(VL, VF));
- Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
- VecTy->getPointerTo());
- Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
-
- for (int i = 0; i < VF; ++i)
- cast<Instruction>(VL[i])->eraseFromParent();
- return 0;
- }
- default:
- Value *S = Scalarize(VL, VecTy);
- VectorizedValues[VL0] = S;
- return S;
- }
-}
-
-} // end of namespace
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VecUtils.h b/contrib/llvm/lib/Transforms/Vectorize/VecUtils.h
deleted file mode 100644
index 5456c6c..0000000
--- a/contrib/llvm/lib/Transforms/Vectorize/VecUtils.h
+++ /dev/null
@@ -1,164 +0,0 @@
-//===- VecUtils.h - Vectorization Utilities -------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This family of classes and functions manipulate vectors and chains of
-// vectors.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
-#define LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include <vector>
-
-namespace llvm {
-
-class BasicBlock; class Instruction; class Type;
-class VectorType; class StoreInst; class Value;
-class ScalarEvolution; class DataLayout;
-class TargetTransformInfo; class AliasAnalysis;
-class Loop;
-
-/// Bottom Up SLP vectorization utility class.
-struct BoUpSLP {
- typedef SmallVector<Value*, 8> ValueList;
- typedef SmallPtrSet<Value*, 16> ValueSet;
- typedef SmallVector<StoreInst*, 8> StoreList;
- static const int max_cost = 1<<20;
-
- // \brief C'tor.
- BoUpSLP(BasicBlock *Bb, ScalarEvolution *Se, DataLayout *Dl,
- TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp);
-
- /// \brief Take the pointer operand from the Load/Store instruction.
- /// \returns NULL if this is not a valid Load/Store instruction.
- static Value *getPointerOperand(Value *I);
-
- /// \brief Take the address space operand from the Load/Store instruction.
- /// \returns -1 if this is not a valid Load/Store instruction.
- static unsigned getAddressSpaceOperand(Value *I);
-
- /// \returns true if the memory operations A and B are consecutive.
- bool isConsecutiveAccess(Value *A, Value *B);
-
- /// \brief Vectorize the tree that starts with the elements in \p VL.
- /// \returns the vectorized value.
- Value *vectorizeTree(ArrayRef<Value *> VL, int VF);
-
- /// \returns the vectorization cost of the subtree that starts at \p VL.
- /// A negative number means that this is profitable.
- int getTreeCost(ArrayRef<Value *> VL);
-
- /// \returns the scalarization cost for this list of values. Assuming that
- /// this subtree gets vectorized, we may need to extract the values from the
- /// roots. This method calculates the cost of extracting the values.
- int getScalarizationCost(ArrayRef<Value *> VL);
-
- /// \brief Attempts to order and vectorize a sequence of stores. This
- /// function does a quadratic scan of the given stores.
- /// \returns true if the basic block was modified.
- bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold);
-
- /// \brief Vectorize a group of scalars into a vector tree.
- void vectorizeArith(ArrayRef<Value *> Operands);
-
- /// \returns the list of new instructions that were added in order to collect
- /// scalars into vectors. This list can be used to further optimize the gather
- /// sequences.
- ValueList &getGatherSeqInstructions() {return GatherInstructions; }
-
-private:
- /// \brief This method contains the recursive part of getTreeCost.
- int getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth);
-
- /// \brief This recursive method looks for vectorization hazards such as
- /// values that are used by multiple users and checks that values are used
- /// by only one vector lane. It updates the variables LaneMap, MultiUserVals.
- void getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth);
-
- /// \brief This method contains the recursive part of vectorizeTree.
- Value *vectorizeTree_rec(ArrayRef<Value *> VL, int VF);
-
- /// \brief Number all of the instructions in the block.
- void numberInstructions();
-
- /// \brief Vectorize a sorted sequence of stores.
- bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold);
-
- /// \returns the scalarization cost for this type. Scalarization in this
- /// context means the creation of vectors from a group of scalars.
- int getScalarizationCost(Type *Ty);
-
- /// \returns the AA location that is being access by the instruction.
- AliasAnalysis::Location getLocation(Instruction *I);
-
- /// \brief Checks if it is possible to sink an instruction from
- /// \p Src to \p Dst.
- /// \returns the pointer to the barrier instruction if we can't sink.
- Value *isUnsafeToSink(Instruction *Src, Instruction *Dst);
-
- /// \returns the instruction that appears last in the BB from \p VL.
- /// Only consider the first \p VF elements.
- Instruction *GetLastInstr(ArrayRef<Value *> VL, unsigned VF);
-
- /// \returns a vector from a collection of scalars in \p VL.
- Value *Scalarize(ArrayRef<Value *> VL, VectorType *Ty);
-
-private:
- /// Maps instructions to numbers and back.
- SmallDenseMap<Value*, int> InstrIdx;
- /// Maps integers to Instructions.
- std::vector<Instruction*> InstrVec;
-
- // -- containers that are used during getTreeCost -- //
-
- /// Contains values that must be scalarized because they are used
- /// by multiple lanes, or by users outside the tree.
- /// NOTICE: The vectorization methods also use this set.
- ValueSet MustScalarize;
-
- /// Contains a list of values that are used outside the current tree. This
- /// set must be reset between runs.
- ValueSet MultiUserVals;
- /// Maps values in the tree to the vector lanes that uses them. This map must
- /// be reset between runs of getCost.
- std::map<Value*, int> LaneMap;
- /// A list of instructions to ignore while sinking
- /// memory instructions. This map must be reset between runs of getCost.
- SmallPtrSet<Value *, 8> MemBarrierIgnoreList;
-
- // -- Containers that are used during vectorizeTree -- //
-
- /// Maps between the first scalar to the vector. This map must be reset
- ///between runs.
- DenseMap<Value*, Value*> VectorizedValues;
-
- // -- Containers that are used after vectorization by the caller -- //
-
- /// A list of instructions that are used when gathering scalars into vectors.
- /// In many cases these instructions can be hoisted outside of the BB.
- /// Iterating over this list is faster than calling LICM.
- ValueList GatherInstructions;
-
- // Analysis and block reference.
- BasicBlock *BB;
- ScalarEvolution *SE;
- DataLayout *DL;
- TargetTransformInfo *TTI;
- AliasAnalysis *AA;
- Loop *L;
-};
-
-} // end of namespace
-
-#endif // LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
OpenPOWER on IntegriCloud