summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms')
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp64
-rw-r--r--contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp47
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp107
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp63
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp34
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp93
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPO.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Inliner.cpp28
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Internalize.cpp38
-rw-r--r--contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp26
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp56
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PruneEH.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombine.h14
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp172
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp25
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp70
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp201
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp30
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h4
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp401
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp437
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp105
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/BlackList.h57
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp16
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/FunctionBlackList.cpp79
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/FunctionBlackList.h37
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp73
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h53
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp137
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp80
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp5
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DCE.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp203
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp120
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVN.cpp99
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp42
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp45
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp37
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp65
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp58
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp13
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp72
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp150
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp105
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SCCP.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SROA.cpp3697
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalar.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp198
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp62
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp980
-rw-r--r--contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp45
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp154
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp262
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp420
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LCSSA.cpp15
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp82
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp132
-rw-r--r--contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp18
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp1507
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp1149
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Utils.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp1012
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp1941
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp8
87 files changed, 12485 insertions, 2942 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index b94dd69..be48b20 100644
--- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -153,7 +153,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
SmallPtrSet<Argument*, 8> ArgsToPromote;
SmallPtrSet<Argument*, 8> ByValArgsToTransform;
for (unsigned i = 0; i != PointerArgs.size(); ++i) {
- bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal);
+ bool isByVal=F->getParamAttributes(PointerArgs[i].second+1).
+ hasAttribute(Attributes::ByVal);
Argument *PtrArg = PointerArgs[i].first;
Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
@@ -517,8 +518,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
const AttrListPtr &PAL = F->getAttributes();
// Add any return attributes.
- if (Attributes attrs = PAL.getRetAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+ Attributes attrs = PAL.getRetAttributes();
+ if (attrs.hasAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+ attrs));
// First, determine the new argument list
unsigned ArgIndex = 1;
@@ -534,7 +537,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
} else if (!ArgsToPromote.count(I)) {
// Unchanged argument
Params.push_back(I->getType());
- if (Attributes attrs = PAL.getParamAttributes(ArgIndex))
+ Attributes attrs = PAL.getParamAttributes(ArgIndex);
+ if (attrs.hasAttributes())
AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs));
} else if (I->use_empty()) {
// Dead argument (which are always marked as promotable)
@@ -587,19 +591,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
}
// Add any function attributes.
- if (Attributes attrs = PAL.getFnAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+ attrs = PAL.getFnAttributes();
+ if (attrs.hasAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+ attrs));
Type *RetTy = FTy->getReturnType();
- // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which
- // have zero fixed arguments.
- bool ExtraArgHack = false;
- if (Params.empty() && FTy->isVarArg()) {
- ExtraArgHack = true;
- Params.push_back(Type::getInt32Ty(F->getContext()));
- }
-
// Construct the new function type using the new arguments.
FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
@@ -613,7 +611,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// Recompute the parameter attributes list based on the new arguments for
// the function.
- NF->setAttributes(AttrListPtr::get(AttributesVec));
+ NF->setAttributes(AttrListPtr::get(F->getContext(), AttributesVec));
AttributesVec.clear();
F->getParent()->getFunctionList().insert(F, NF);
@@ -641,8 +639,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
const AttrListPtr &CallPAL = CS.getAttributes();
// Add any return attributes.
- if (Attributes attrs = CallPAL.getRetAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+ Attributes attrs = CallPAL.getRetAttributes();
+ if (attrs.hasAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+ attrs));
// Loop over the operands, inserting GEP and loads in the caller as
// appropriate.
@@ -653,7 +653,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
Args.push_back(*AI); // Unmodified argument
- if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
+ Attributes Attrs = CallPAL.getParamAttributes(ArgIndex);
+ if (Attrs.hasAttributes())
AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
} else if (ByValArgsToTransform.count(I)) {
@@ -711,30 +712,32 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
}
}
- if (ExtraArgHack)
- Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext())));
-
// Push any varargs arguments on the list.
for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
Args.push_back(*AI);
- if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
+ Attributes Attrs = CallPAL.getParamAttributes(ArgIndex);
+ if (Attrs.hasAttributes())
AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
}
// Add any function attributes.
- if (Attributes attrs = CallPAL.getFnAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+ attrs = CallPAL.getFnAttributes();
+ if (attrs.hasAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+ attrs));
Instruction *New;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
Args, "", Call);
cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
- cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec));
+ cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(II->getContext(),
+ AttributesVec));
} else {
New = CallInst::Create(NF, Args, "", Call);
cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
- cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec));
+ cast<CallInst>(New)->setAttributes(AttrListPtr::get(New->getContext(),
+ AttributesVec));
if (cast<CallInst>(Call)->isTailCall())
cast<CallInst>(New)->setTailCall();
}
@@ -870,16 +873,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
}
// Increment I2 past all of the arguments added for this promoted pointer.
- for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i)
- ++I2;
+ std::advance(I2, ArgIndices.size());
}
- // Notify the alias analysis implementation that we inserted a new argument.
- if (ExtraArgHack)
- AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())),
- NF->arg_begin());
-
-
// Tell the alias analysis that the old function is about to disappear.
AA.replaceWithNewValue(F, NF);
diff --git a/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp b/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp
new file mode 100644
index 0000000..2e32240
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp
@@ -0,0 +1,47 @@
+//===- BarrierNoopPass.cpp - A barrier pass for the pass manager ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// NOTE: DO NOT USE THIS IF AVOIDABLE
+//
+// This pass is a nonce pass intended to allow manipulation of the implicitly
+// nesting pass manager. For example, it can be used to cause a CGSCC pass
+// manager to be closed prior to running a new collection of function passes.
+//
+// FIXME: This is a huge HACK. This should be removed when the pass manager's
+// nesting is made explicit instead of implicit.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
+using namespace llvm;
+
+namespace {
+/// \brief A nonce module pass used to place a barrier in a pass manager.
+///
+/// There is no mechanism for ending a CGSCC pass manager once one is started.
+/// This prevents extension points from having clear deterministic ordering
+/// when they are phrased as non-module passes.
+class BarrierNoop : public ModulePass {
+public:
+ static char ID; // Pass identification.
+
+ BarrierNoop() : ModulePass(ID) {
+ initializeBarrierNoopPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) { return false; }
+};
+}
+
+ModulePass *llvm::createBarrierNoopPass() { return new BarrierNoop(); }
+
+char BarrierNoop::ID = 0;
+INITIALIZE_PASS(BarrierNoop, "barrier", "A No-Op Barrier Pass",
+ false, false)
diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index d8fae8a..e2f0126 100644
--- a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -23,7 +23,7 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -50,7 +50,7 @@ namespace {
// alignment to a concrete value.
unsigned getAlignment(GlobalVariable *GV) const;
- const TargetData *TD;
+ const DataLayout *TD;
};
}
@@ -98,7 +98,7 @@ unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const {
}
bool ConstantMerge::runOnModule(Module &M) {
- TD = getAnalysisIfAvailable<TargetData>();
+ TD = getAnalysisIfAvailable<DataLayout>();
// Find all the globals that are marked "used". These cannot be merged.
SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
@@ -107,7 +107,7 @@ bool ConstantMerge::runOnModule(Module &M) {
// Map unique <constants, has-unknown-alignment> pairs to globals. We don't
// want to merge globals of unknown alignment with those of explicit
- // alignment. If we have TargetData, we always know the alignment.
+ // alignment. If we have DataLayout, we always know the alignment.
DenseMap<PointerIntPair<Constant*, 1, bool>, GlobalVariable*> CMap;
// Replacements - This vector contains a list of replacements to perform.
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index fd23a93..4cfd0b2 100644
--- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -21,7 +21,9 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/CallingConv.h"
#include "llvm/Constant.h"
+#include "llvm/DebugInfo.h"
#include "llvm/DerivedTypes.h"
+#include "llvm/DIBuilder.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
@@ -30,6 +32,7 @@
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
@@ -121,6 +124,15 @@ namespace {
typedef SmallVector<RetOrArg, 5> UseVector;
+ // Map each LLVM function to corresponding metadata with debug info. If
+ // the function is replaced with another one, we should patch the pointer
+ // to LLVM function in metadata.
+ // As the code generation for module is finished (and DIBuilder is
+ // finalized) we assume that subprogram descriptors won't be changed, and
+ // they are stored in map for short duration anyway.
+ typedef DenseMap<Function*, DISubprogram> FunctionDIMap;
+ FunctionDIMap FunctionDIs;
+
protected:
// DAH uses this to specify a different ID.
explicit DAE(char &ID) : ModulePass(ID) {}
@@ -141,6 +153,7 @@ namespace {
unsigned RetValNum = 0);
Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses);
+ void CollectFunctionDIs(Module &M);
void SurveyFunction(const Function &F);
void MarkValue(const RetOrArg &RA, Liveness L,
const UseVector &MaybeLiveUses);
@@ -180,6 +193,33 @@ INITIALIZE_PASS(DAH, "deadarghaX0r",
ModulePass *llvm::createDeadArgEliminationPass() { return new DAE(); }
ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); }
+/// CollectFunctionDIs - Map each function in the module to its debug info
+/// descriptor.
+void DAE::CollectFunctionDIs(Module &M) {
+ FunctionDIs.clear();
+
+ for (Module::named_metadata_iterator I = M.named_metadata_begin(),
+ E = M.named_metadata_end(); I != E; ++I) {
+ NamedMDNode &NMD = *I;
+ for (unsigned MDIndex = 0, MDNum = NMD.getNumOperands();
+ MDIndex < MDNum; ++MDIndex) {
+ MDNode *Node = NMD.getOperand(MDIndex);
+ if (!DIDescriptor(Node).isCompileUnit())
+ continue;
+ DICompileUnit CU(Node);
+ const DIArray &SPs = CU.getSubprograms();
+ for (unsigned SPIndex = 0, SPNum = SPs.getNumElements();
+ SPIndex < SPNum; ++SPIndex) {
+ DISubprogram SP(SPs.getElement(SPIndex));
+ if (!SP.Verify())
+ continue;
+ if (Function *F = SP.getFunction())
+ FunctionDIs[F] = SP;
+ }
+ }
+ }
+}
+
/// DeleteDeadVarargs - If this is an function that takes a ... list, and if
/// llvm.vastart is never called, the varargs list is dead for the function.
bool DAE::DeleteDeadVarargs(Function &Fn) {
@@ -236,9 +276,11 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
SmallVector<AttributeWithIndex, 8> AttributesVec;
for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i)
AttributesVec.push_back(PAL.getSlot(i));
- if (Attributes FnAttrs = PAL.getFnAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
- PAL = AttrListPtr::get(AttributesVec);
+ Attributes FnAttrs = PAL.getFnAttributes();
+ if (FnAttrs.hasAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+ FnAttrs));
+ PAL = AttrListPtr::get(Fn.getContext(), AttributesVec);
}
Instruction *New;
@@ -284,6 +326,11 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
I2->takeName(I);
}
+ // Patch the pointer to LLVM function in debug info descriptor.
+ FunctionDIMap::iterator DI = FunctionDIs.find(&Fn);
+ if (DI != FunctionDIs.end())
+ DI->second.replaceFunction(NF);
+
// Finally, nuke the old function.
Fn.eraseFromParent();
return true;
@@ -717,13 +764,17 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// here. Currently, this should not be possible, but special handling might be
// required when new return value attributes are added.
if (NRetTy->isVoidTy())
- RAttrs &= ~Attribute::typeIncompatible(NRetTy);
+ RAttrs =
+ Attributes::get(NRetTy->getContext(), AttrBuilder(RAttrs).
+ removeAttributes(Attributes::typeIncompatible(NRetTy)));
else
- assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0
- && "Return attributes no longer compatible?");
+ assert(!AttrBuilder(RAttrs).
+ hasAttributes(Attributes::typeIncompatible(NRetTy)) &&
+ "Return attributes no longer compatible?");
- if (RAttrs)
- AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs));
+ if (RAttrs.hasAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+ RAttrs));
// Remember which arguments are still alive.
SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false);
@@ -740,7 +791,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// Get the original parameter attributes (skipping the first one, that is
// for the return value.
- if (Attributes Attrs = PAL.getParamAttributes(i + 1))
+ Attributes Attrs = PAL.getParamAttributes(i + 1);
+ if (Attrs.hasAttributes())
AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs));
} else {
++NumArgumentsEliminated;
@@ -749,11 +801,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
}
}
- if (FnAttrs != Attribute::None)
- AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+ if (FnAttrs.hasAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+ FnAttrs));
// Reconstruct the AttributesList based on the vector we constructed.
- AttrListPtr NewPAL = AttrListPtr::get(AttributesVec);
+ AttrListPtr NewPAL = AttrListPtr::get(F->getContext(), AttributesVec);
// Create the new function type based on the recomputed parameters.
FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg());
@@ -786,9 +839,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
Attributes RAttrs = CallPAL.getRetAttributes();
Attributes FnAttrs = CallPAL.getFnAttributes();
// Adjust in case the function was changed to return void.
- RAttrs &= ~Attribute::typeIncompatible(NF->getReturnType());
- if (RAttrs)
- AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs));
+ RAttrs =
+ Attributes::get(NF->getContext(), AttrBuilder(RAttrs).
+ removeAttributes(Attributes::typeIncompatible(NF->getReturnType())));
+ if (RAttrs.hasAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+ RAttrs));
// Declare these outside of the loops, so we can reuse them for the second
// loop, which loops the varargs.
@@ -800,22 +856,25 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
if (ArgAlive[i]) {
Args.push_back(*I);
// Get original parameter attributes, but skip return attributes.
- if (Attributes Attrs = CallPAL.getParamAttributes(i + 1))
+ Attributes Attrs = CallPAL.getParamAttributes(i + 1);
+ if (Attrs.hasAttributes())
AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
}
// Push any varargs arguments on the list. Don't forget their attributes.
for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) {
Args.push_back(*I);
- if (Attributes Attrs = CallPAL.getParamAttributes(i + 1))
+ Attributes Attrs = CallPAL.getParamAttributes(i + 1);
+ if (Attrs.hasAttributes())
AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
}
- if (FnAttrs != Attribute::None)
- AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+ if (FnAttrs.hasAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+ FnAttrs));
// Reconstruct the AttributesList based on the vector we constructed.
- AttrListPtr NewCallPAL = AttrListPtr::get(AttributesVec);
+ AttrListPtr NewCallPAL = AttrListPtr::get(F->getContext(), AttributesVec);
Instruction *New;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
@@ -952,6 +1011,11 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
BB->getInstList().erase(RI);
}
+ // Patch the pointer to LLVM function in debug info descriptor.
+ FunctionDIMap::iterator DI = FunctionDIs.find(F);
+ if (DI != FunctionDIs.end())
+ DI->second.replaceFunction(NF);
+
// Now that the old function is dead, delete it.
F->eraseFromParent();
@@ -961,6 +1025,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
bool DAE::runOnModule(Module &M) {
bool Changed = false;
+ // Collect debug info descriptors for functions.
+ CollectFunctionDIs(M);
+
// First pass: Do a simple check to see if any functions can have their "..."
// removed. We can do this if they never call va_start. This loop cannot be
// fused with the next loop, because deleting a function invalidates
diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
index 4c7f0ed..6716deb 100644
--- a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -51,32 +51,75 @@ namespace {
// Visit the GlobalVariables.
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
- if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) {
- I->setInitializer(0);
- } else {
+ bool Delete =
+ deleteStuff == (bool)Named.count(I) && !I->isDeclaration();
+ if (!Delete) {
if (I->hasAvailableExternallyLinkage())
continue;
if (I->getName() == "llvm.global_ctors")
continue;
}
- if (I->hasLocalLinkage())
+ bool Local = I->hasLocalLinkage();
+ if (Local)
I->setVisibility(GlobalValue::HiddenVisibility);
- I->setLinkage(GlobalValue::ExternalLinkage);
+
+ if (Local || Delete)
+ I->setLinkage(GlobalValue::ExternalLinkage);
+
+ if (Delete)
+ I->setInitializer(0);
}
// Visit the Functions.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) {
- I->deleteBody();
- } else {
+ bool Delete =
+ deleteStuff == (bool)Named.count(I) && !I->isDeclaration();
+ if (!Delete) {
if (I->hasAvailableExternallyLinkage())
continue;
}
- if (I->hasLocalLinkage())
+ bool Local = I->hasLocalLinkage();
+ if (Local)
I->setVisibility(GlobalValue::HiddenVisibility);
- I->setLinkage(GlobalValue::ExternalLinkage);
+
+ if (Local || Delete)
+ I->setLinkage(GlobalValue::ExternalLinkage);
+
+ if (Delete)
+ I->deleteBody();
+ }
+
+ // Visit the Aliases.
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E;) {
+ Module::alias_iterator CurI = I;
+ ++I;
+
+ if (CurI->hasLocalLinkage()) {
+ CurI->setVisibility(GlobalValue::HiddenVisibility);
+ CurI->setLinkage(GlobalValue::ExternalLinkage);
+ }
+
+ if (deleteStuff == (bool)Named.count(CurI)) {
+ Type *Ty = CurI->getType()->getElementType();
+
+ CurI->removeFromParent();
+ llvm::Value *Declaration;
+ if (FunctionType *FTy = dyn_cast<FunctionType>(Ty)) {
+ Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage,
+ CurI->getName(), &M);
+
+ } else {
+ Declaration =
+ new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage,
+ 0, CurI->getName());
+
+ }
+ CurI->replaceAllUsesWith(Declaration);
+ delete CurI;
+ }
}
return true;
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index f3f6228..18409f7 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -28,9 +28,9 @@
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/UniqueVector.h"
#include "llvm/Support/InstIterator.h"
using namespace llvm;
@@ -212,10 +212,17 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
MadeChange = true;
// Clear out any existing attributes.
- F->removeAttribute(~0, Attribute::ReadOnly | Attribute::ReadNone);
+ AttrBuilder B;
+ B.addAttribute(Attributes::ReadOnly)
+ .addAttribute(Attributes::ReadNone);
+ F->removeAttribute(AttrListPtr::FunctionIndex,
+ Attributes::get(F->getContext(), B));
// Add in the new attribute.
- F->addAttribute(~0, ReadsMemory? Attribute::ReadOnly : Attribute::ReadNone);
+ B.clear();
+ B.addAttribute(ReadsMemory ? Attributes::ReadOnly : Attributes::ReadNone);
+ F->addAttribute(AttrListPtr::FunctionIndex,
+ Attributes::get(F->getContext(), B));
if (ReadsMemory)
++NumReadOnly;
@@ -276,8 +283,6 @@ namespace {
void tooManyUses() { Captured = true; }
- bool shouldExplore(Use *U) { return true; }
-
bool captured(Use *U) {
CallSite CS(U->getUser());
if (!CS.getInstruction()) { Captured = true; return true; }
@@ -352,6 +357,9 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
ArgumentGraph AG;
+ AttrBuilder B;
+ B.addAttribute(Attributes::NoCapture);
+
// Check each function in turn, determining which pointer arguments are not
// captured.
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
@@ -373,7 +381,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end();
A != E; ++A) {
if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
- A->addAttr(Attribute::NoCapture);
+ A->addAttr(Attributes::get(F->getContext(), B));
++NumNoCapture;
Changed = true;
}
@@ -388,7 +396,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
if (!Tracker.Captured) {
if (Tracker.Uses.empty()) {
// If it's trivially not captured, mark it nocapture now.
- A->addAttr(Attribute::NoCapture);
+ A->addAttr(Attributes::get(F->getContext(), B));
++NumNoCapture;
Changed = true;
} else {
@@ -421,7 +429,9 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
// eg. "void f(int* x) { if (...) f(x); }"
if (ArgumentSCC[0]->Uses.size() == 1 &&
ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
- ArgumentSCC[0]->Definition->addAttr(Attribute::NoCapture);
+ ArgumentSCC[0]->
+ Definition->
+ addAttr(Attributes::get(ArgumentSCC[0]->Definition->getContext(), B));
++NumNoCapture;
Changed = true;
}
@@ -463,7 +473,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
- A->addAttr(Attribute::NoCapture);
+ A->addAttr(Attributes::get(A->getContext(), B));
++NumNoCapture;
Changed = true;
}
@@ -476,13 +486,13 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
/// or a pointer that doesn't alias any other pointer visible to the caller.
bool FunctionAttrs::IsFunctionMallocLike(Function *F,
SmallPtrSet<Function*, 8> &SCCNodes) const {
- UniqueVector<Value *> FlowsToReturn;
+ SmallSetVector<Value *, 8> FlowsToReturn;
for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator()))
FlowsToReturn.insert(Ret->getReturnValue());
for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
- Value *RetVal = FlowsToReturn[i+1]; // UniqueVector[0] is reserved.
+ Value *RetVal = FlowsToReturn[i];
if (Constant *C = dyn_cast<Constant>(RetVal)) {
if (!C->isNullValue() && !isa<UndefValue>(C))
@@ -520,7 +530,7 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
case Instruction::Call:
case Instruction::Invoke: {
CallSite CS(RVI);
- if (CS.paramHasAttr(0, Attribute::NoAlias))
+ if (CS.paramHasAttr(0, Attributes::NoAlias))
break;
if (CS.getCalledFunction() &&
SCCNodes.count(CS.getCalledFunction()))
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 6d950d2..591278f 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -25,7 +25,7 @@
#include "llvm/Pass.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
@@ -83,7 +83,7 @@ namespace {
const GlobalStatus &GS);
bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
- TargetData *TD;
+ DataLayout *TD;
TargetLibraryInfo *TLI;
};
}
@@ -225,6 +225,7 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
// Don't hack on volatile stores.
if (SI->isVolatile()) return true;
+
GS.Ordering = StrongerOrdering(GS.Ordering, SI->getOrdering());
// If this is a direct store to the global (i.e., the global is a scalar
@@ -234,6 +235,14 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(
SI->getOperand(1))) {
Value *StoredVal = SI->getOperand(0);
+
+ if (Constant *C = dyn_cast<Constant>(StoredVal)) {
+ if (C->isThreadDependent()) {
+ // The stored value changes between threads; don't track it.
+ return true;
+ }
+ }
+
if (StoredVal == GV->getInitializer()) {
if (GS.StoredType < GlobalStatus::isInitializerStored)
GS.StoredType = GlobalStatus::isInitializerStored;
@@ -346,7 +355,7 @@ static bool isLeakCheckerRoot(GlobalVariable *GV) {
/// Given a value that is stored to a global but never read, determine whether
/// it's safe to remove the store and the chain of computation that feeds the
/// store.
-static bool IsSafeComputationToRemove(Value *V) {
+static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
do {
if (isa<Constant>(V))
return true;
@@ -355,7 +364,7 @@ static bool IsSafeComputationToRemove(Value *V) {
if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
isa<GlobalValue>(V))
return false;
- if (isAllocationFn(V))
+ if (isAllocationFn(V, TLI))
return true;
Instruction *I = cast<Instruction>(V);
@@ -376,7 +385,8 @@ static bool IsSafeComputationToRemove(Value *V) {
/// of the global and clean up any that obviously don't assign the global a
/// value that isn't dynamically allocated.
///
-static bool CleanupPointerRootUsers(GlobalVariable *GV) {
+static bool CleanupPointerRootUsers(GlobalVariable *GV,
+ const TargetLibraryInfo *TLI) {
// A brief explanation of leak checkers. The goal is to find bugs where
// pointers are forgotten, causing an accumulating growth in memory
// usage over time. The common strategy for leak checkers is to whitelist the
@@ -432,18 +442,18 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV) {
C->destroyConstant();
// This could have invalidated UI, start over from scratch.
Dead.clear();
- CleanupPointerRootUsers(GV);
+ CleanupPointerRootUsers(GV, TLI);
return true;
}
}
}
for (int i = 0, e = Dead.size(); i != e; ++i) {
- if (IsSafeComputationToRemove(Dead[i].first)) {
+ if (IsSafeComputationToRemove(Dead[i].first, TLI)) {
Dead[i].second->eraseFromParent();
Instruction *I = Dead[i].first;
do {
- if (isAllocationFn(I))
+ if (isAllocationFn(I, TLI))
break;
Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
if (!J)
@@ -463,7 +473,7 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV) {
/// quick scan over the use list to clean up the easy and obvious cruft. This
/// returns true if it made a change.
static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
- TargetData *TD, TargetLibraryInfo *TLI) {
+ DataLayout *TD, TargetLibraryInfo *TLI) {
bool Changed = false;
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) {
User *U = *UI++;
@@ -655,7 +665,7 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
/// behavior of the program in a more fine-grained way. We have determined that
/// this transformation is safe already. We return the first global variable we
/// insert so that the caller can reprocess it.
-static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
+static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &TD) {
// Make sure this global only has simple uses that we can SRA.
if (!GlobalUsersSafeToSRA(GV))
return 0;
@@ -931,7 +941,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
/// if the loaded value is dynamically null, then we know that they cannot be
/// reachable with a null optimize away the load.
static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
- TargetData *TD,
+ DataLayout *TD,
TargetLibraryInfo *TLI) {
bool Changed = false;
@@ -961,7 +971,9 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
// If we get here we could have other crazy uses that are transitively
// loaded.
assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
- isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser)) &&
+ isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
+ isa<BitCastInst>(GlobalUser) ||
+ isa<GetElementPtrInst>(GlobalUser)) &&
"Only expect load and stores!");
}
}
@@ -975,7 +987,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
// nor is the global.
if (AllNonStoreUsesGone) {
if (isLeakCheckerRoot(GV)) {
- Changed |= CleanupPointerRootUsers(GV);
+ Changed |= CleanupPointerRootUsers(GV, TLI);
} else {
Changed = true;
CleanupConstantGlobalUsers(GV, 0, TD, TLI);
@@ -993,7 +1005,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
/// instructions that are foldable.
static void ConstantPropUsersOf(Value *V,
- TargetData *TD, TargetLibraryInfo *TLI) {
+ DataLayout *TD, TargetLibraryInfo *TLI) {
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
if (Instruction *I = dyn_cast<Instruction>(*UI++))
if (Constant *NewC = ConstantFoldInstruction(I, TD, TLI)) {
@@ -1016,7 +1028,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
CallInst *CI,
Type *AllocTy,
ConstantInt *NElements,
- TargetData *TD,
+ DataLayout *TD,
TargetLibraryInfo *TLI) {
DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
@@ -1465,9 +1477,10 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break
/// it up into multiple allocations of arrays of the fields.
static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
- Value *NElems, TargetData *TD) {
+ Value *NElems, DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n');
- Type *MAT = getMallocAllocatedType(CI);
+ Type *MAT = getMallocAllocatedType(CI, TLI);
StructType *STy = cast<StructType>(MAT);
// There is guaranteed to be at least one use of the malloc (storing
@@ -1656,7 +1669,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
Type *AllocTy,
AtomicOrdering Ordering,
Module::global_iterator &GVI,
- TargetData *TD,
+ DataLayout *TD,
TargetLibraryInfo *TLI) {
if (!TD)
return false;
@@ -1688,7 +1701,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// This eliminates dynamic allocation, avoids an indirection accessing the
// data, and exposes the resultant global to further GlobalOpt.
// We cannot optimize the malloc if we cannot determine malloc array size.
- Value *NElems = getMallocArraySize(CI, TD, true);
+ Value *NElems = getMallocArraySize(CI, TD, TLI, true);
if (!NElems)
return false;
@@ -1725,7 +1738,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// If this is a fixed size array, transform the Malloc to be an alloc of
// structs. malloc [100 x struct],1 -> malloc struct, 100
- if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
+ if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
@@ -1742,7 +1755,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
CI = cast<CallInst>(Malloc);
}
- GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true), TD);
+ GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, TLI, true),
+ TD, TLI);
return true;
}
@@ -1754,7 +1768,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
AtomicOrdering Ordering,
Module::global_iterator &GVI,
- TargetData *TD, TargetLibraryInfo *TLI) {
+ DataLayout *TD, TargetLibraryInfo *TLI) {
// Ignore no-op GEPs and bitcasts.
StoredOnceVal = StoredOnceVal->stripPointerCasts();
@@ -1771,8 +1785,8 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
// Optimize away any trapping uses of the loaded value.
if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, TD, TLI))
return true;
- } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
- Type *MallocType = getMallocAllocatedType(CI);
+ } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) {
+ Type *MallocType = getMallocAllocatedType(CI, TLI);
if (MallocType &&
TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI,
TD, TLI))
@@ -1964,7 +1978,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
bool Changed;
if (isLeakCheckerRoot(GV)) {
// Delete any constant stores to the global.
- Changed = CleanupPointerRootUsers(GV);
+ Changed = CleanupPointerRootUsers(GV, TLI);
} else {
// Delete any stores we can find to the global. We may not be able to
// make it completely dead though.
@@ -1997,7 +2011,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
++NumMarked;
return true;
} else if (!GV->getInitializer()->getType()->isSingleValueType()) {
- if (TargetData *TD = getAnalysisIfAvailable<TargetData>())
+ if (DataLayout *TD = getAnalysisIfAvailable<DataLayout>())
if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) {
GVI = FirstNewGV; // Don't skip the newly produced globals!
return true;
@@ -2056,25 +2070,26 @@ static void ChangeCalleesToFastCall(Function *F) {
}
}
-static AttrListPtr StripNest(const AttrListPtr &Attrs) {
+static AttrListPtr StripNest(LLVMContext &C, const AttrListPtr &Attrs) {
for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
- if ((Attrs.getSlot(i).Attrs & Attribute::Nest) == 0)
+ if (!Attrs.getSlot(i).Attrs.hasAttribute(Attributes::Nest))
continue;
// There can be only one.
- return Attrs.removeAttr(Attrs.getSlot(i).Index, Attribute::Nest);
+ return Attrs.removeAttr(C, Attrs.getSlot(i).Index,
+ Attributes::get(C, Attributes::Nest));
}
return Attrs;
}
static void RemoveNestAttribute(Function *F) {
- F->setAttributes(StripNest(F->getAttributes()));
+ F->setAttributes(StripNest(F->getContext(), F->getAttributes()));
for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
if (isa<BlockAddress>(*UI))
continue;
CallSite User(cast<Instruction>(*UI));
- User.setAttributes(StripNest(User.getAttributes()));
+ User.setAttributes(StripNest(F->getContext(), User.getAttributes()));
}
}
@@ -2103,7 +2118,7 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
Changed = true;
}
- if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
+ if (F->getAttributes().hasAttrSomewhere(Attributes::Nest) &&
!F->hasAddressTaken()) {
// The function is not used by a trampoline intrinsic, so it is safe
// to remove the 'nest' attribute.
@@ -2251,7 +2266,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
static inline bool
isSimpleEnoughValueToCommit(Constant *C,
SmallPtrSet<Constant*, 8> &SimpleConstants,
- const TargetData *TD);
+ const DataLayout *TD);
/// isSimpleEnoughValueToCommit - Return true if the specified constant can be
@@ -2264,7 +2279,7 @@ isSimpleEnoughValueToCommit(Constant *C,
/// time.
static bool isSimpleEnoughValueToCommitHelper(Constant *C,
SmallPtrSet<Constant*, 8> &SimpleConstants,
- const TargetData *TD) {
+ const DataLayout *TD) {
// Simple integer, undef, constant aggregate zero, global addresses, etc are
// all supported.
if (C->getNumOperands() == 0 || isa<BlockAddress>(C) ||
@@ -2319,7 +2334,7 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
static inline bool
isSimpleEnoughValueToCommit(Constant *C,
SmallPtrSet<Constant*, 8> &SimpleConstants,
- const TargetData *TD) {
+ const DataLayout *TD) {
// If we already checked this constant, we win.
if (!SimpleConstants.insert(C)) return true;
// Check the constant.
@@ -2450,7 +2465,7 @@ namespace {
/// Once an evaluation call fails, the evaluation object should not be reused.
class Evaluator {
public:
- Evaluator(const TargetData *TD, const TargetLibraryInfo *TLI)
+ Evaluator(const DataLayout *TD, const TargetLibraryInfo *TLI)
: TD(TD), TLI(TLI) {
ValueStack.push_back(new DenseMap<Value*, Constant*>);
}
@@ -2531,7 +2546,7 @@ private:
/// simple enough to live in a static initializer of a global.
SmallPtrSet<Constant*, 8> SimpleConstants;
- const TargetData *TD;
+ const DataLayout *TD;
const TargetLibraryInfo *TLI;
};
@@ -2869,7 +2884,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
/// we can. Return true if we can, false otherwise.
-static bool EvaluateStaticConstructor(Function *F, const TargetData *TD,
+static bool EvaluateStaticConstructor(Function *F, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
// Call the function.
Evaluator Eval(TD, TLI);
@@ -3110,7 +3125,7 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
bool GlobalOpt::runOnModule(Module &M) {
bool Changed = false;
- TD = getAnalysisIfAvailable<TargetData>();
+ TD = getAnalysisIfAvailable<DataLayout>();
TLI = &getAnalysis<TargetLibraryInfo>();
// Try to find the llvm.globalctors list.
diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
index 6233922..5d563d8 100644
--- a/contrib/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
@@ -1,4 +1,4 @@
-//===-- Scalar.cpp --------------------------------------------------------===//
+//===-- IPO.cpp -----------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -95,7 +95,10 @@ void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) {
}
void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) {
- unwrap(PM)->add(createInternalizePass(AllButMain != 0));
+ std::vector<const char *> Export;
+ if (AllButMain)
+ Export.push_back("main");
+ unwrap(PM)->add(createInternalizePass(Export));
}
void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) {
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
index 664ddf6..b1c36c1 100644
--- a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
@@ -23,7 +23,7 @@
#include "llvm/Support/CallSite.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/InlinerPass.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/ADT/SmallPtrSet.h"
using namespace llvm;
@@ -65,7 +65,7 @@ Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) {
/// \brief Minimal filter to detect invalid constructs for inlining.
static bool isInlineViable(Function &F) {
- bool ReturnsTwice = F.hasFnAttr(Attribute::ReturnsTwice);
+ bool ReturnsTwice =F.getFnAttributes().hasAttribute(Attributes::ReturnsTwice);
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
// Disallow inlining of functions which contain an indirect branch.
if (isa<IndirectBrInst>(BI->getTerminator()))
@@ -114,7 +114,7 @@ InlineCost AlwaysInliner::getInlineCost(CallSite CS) {
if (Callee->isDeclaration()) return InlineCost::getNever();
// Return never for anything not marked as always inline.
- if (!Callee->hasFnAttr(Attribute::AlwaysInline))
+ if (!Callee->getFnAttributes().hasAttribute(Attributes::AlwaysInline))
return InlineCost::getNever();
// Do some minimal analysis to preclude non-viable functions.
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
index 50038d8..bf0b1f9 100644
--- a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
@@ -22,7 +22,7 @@
#include "llvm/Support/CallSite.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/InlinerPass.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
using namespace llvm;
@@ -62,7 +62,7 @@ Pass *llvm::createFunctionInliningPass(int Threshold) {
// doInitialization - Initializes the vector of functions that have been
// annotated with the noinline attribute.
bool SimpleInliner::doInitialization(CallGraph &CG) {
- CA.setTargetData(getAnalysisIfAvailable<TargetData>());
+ CA.setDataLayout(getAnalysisIfAvailable<DataLayout>());
return false;
}
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
index 712888a..abcb25f 100644
--- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -19,7 +19,8 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/IPO/InlinerPass.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -92,11 +93,11 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
// If the inlined function had a higher stack protection level than the
// calling function, then bump up the caller's stack protection level.
- if (Callee->hasFnAttr(Attribute::StackProtectReq))
- Caller->addFnAttr(Attribute::StackProtectReq);
- else if (Callee->hasFnAttr(Attribute::StackProtect) &&
- !Caller->hasFnAttr(Attribute::StackProtectReq))
- Caller->addFnAttr(Attribute::StackProtect);
+ if (Callee->getFnAttributes().hasAttribute(Attributes::StackProtectReq))
+ Caller->addFnAttr(Attributes::StackProtectReq);
+ else if (Callee->getFnAttributes().hasAttribute(Attributes::StackProtect) &&
+ !Caller->getFnAttributes().hasAttribute(Attributes::StackProtectReq))
+ Caller->addFnAttr(Attributes::StackProtect);
// Look at all of the allocas that we inlined through this call site. If we
// have already inlined other allocas through other calls into this function,
@@ -208,14 +209,15 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const {
// would decrease the threshold.
Function *Caller = CS.getCaller();
bool OptSize = Caller && !Caller->isDeclaration() &&
- Caller->hasFnAttr(Attribute::OptimizeForSize);
- if (!(InlineLimit.getNumOccurrences() > 0) && OptSize && OptSizeThreshold < thres)
+ Caller->getFnAttributes().hasAttribute(Attributes::OptimizeForSize);
+ if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
+ OptSizeThreshold < thres)
thres = OptSizeThreshold;
// Listen to the inlinehint attribute when it would increase the threshold.
Function *Callee = CS.getCalledFunction();
bool InlineHint = Callee && !Callee->isDeclaration() &&
- Callee->hasFnAttr(Attribute::InlineHint);
+ Callee->getFnAttributes().hasAttribute(Attributes::InlineHint);
if (InlineHint && HintThreshold > thres)
thres = HintThreshold;
@@ -338,7 +340,8 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraph>();
- const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ const TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
SmallPtrSet<Function*, 8> SCCFunctions;
DEBUG(dbgs() << "Inliner visiting SCC:");
@@ -417,7 +420,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
// just delete the call instead of trying to inline it, regardless of
// size. This happens because IPSCCP propagates the result out of the
// call and then we're left with the dead call.
- if (isInstructionTriviallyDead(CS.getInstruction())) {
+ if (isInstructionTriviallyDead(CS.getInstruction(), TLI)) {
DEBUG(dbgs() << " -> Deleting dead call: "
<< *CS.getInstruction() << "\n");
// Update the call graph by deleting the edge from Callee to Caller.
@@ -530,7 +533,8 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
// Handle the case when this function is called and we only want to care
// about always-inline functions. This is a bit of a hack to share code
// between here and the InlineAlways pass.
- if (AlwaysInlineOnly && !F->hasFnAttr(Attribute::AlwaysInline))
+ if (AlwaysInlineOnly &&
+ !F->getFnAttributes().hasAttribute(Attributes::AlwaysInline))
continue;
// If the only remaining users of the function are dead constants, remove
diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
index fb5869e..aa629cc 100644
--- a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -7,9 +7,9 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass loops over all of the functions in the input module, looking for a
-// main function. If a main function is found, all other functions and all
-// global variables with initializers are marked as internal.
+// This pass loops over all of the functions and variables in the input module.
+// If the function or variable is not in the list of external names given to
+// the pass it is marked as internal.
//
//===----------------------------------------------------------------------===//
@@ -45,12 +45,9 @@ APIList("internalize-public-api-list", cl::value_desc("list"),
namespace {
class InternalizePass : public ModulePass {
std::set<std::string> ExternalNames;
- /// If no api symbols were specified and a main function is defined,
- /// assume the main function is the only API
- bool AllButMain;
public:
static char ID; // Pass identification, replacement for typeid
- explicit InternalizePass(bool AllButMain = true);
+ explicit InternalizePass();
explicit InternalizePass(const std::vector <const char *>& exportList);
void LoadFile(const char *Filename);
virtual bool runOnModule(Module &M);
@@ -66,8 +63,8 @@ char InternalizePass::ID = 0;
INITIALIZE_PASS(InternalizePass, "internalize",
"Internalize Global Symbols", false, false)
-InternalizePass::InternalizePass(bool AllButMain)
- : ModulePass(ID), AllButMain(AllButMain){
+InternalizePass::InternalizePass()
+ : ModulePass(ID) {
initializeInternalizePassPass(*PassRegistry::getPassRegistry());
if (!APIFile.empty()) // If a filename is specified, use it.
LoadFile(APIFile.c_str());
@@ -76,7 +73,7 @@ InternalizePass::InternalizePass(bool AllButMain)
}
InternalizePass::InternalizePass(const std::vector<const char *>&exportList)
- : ModulePass(ID), AllButMain(false){
+ : ModulePass(ID){
initializeInternalizePassPass(*PassRegistry::getPassRegistry());
for(std::vector<const char *>::const_iterator itr = exportList.begin();
itr != exportList.end(); itr++) {
@@ -103,23 +100,6 @@ void InternalizePass::LoadFile(const char *Filename) {
bool InternalizePass::runOnModule(Module &M) {
CallGraph *CG = getAnalysisIfAvailable<CallGraph>();
CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
-
- if (ExternalNames.empty()) {
- // Return if we're not in 'all but main' mode and have no external api
- if (!AllButMain)
- return false;
- // If no list or file of symbols was specified, check to see if there is a
- // "main" symbol defined in the module. If so, use it, otherwise do not
- // internalize the module, it must be a library or something.
- //
- Function *MainFunc = M.getFunction("main");
- if (MainFunc == 0 || MainFunc->isDeclaration())
- return false; // No main found, must be a library...
-
- // Preserve main, internalize all else.
- ExternalNames.insert(MainFunc->getName());
- }
-
bool Changed = false;
// Never internalize functions which code-gen might insert.
@@ -189,8 +169,8 @@ bool InternalizePass::runOnModule(Module &M) {
return Changed;
}
-ModulePass *llvm::createInternalizePass(bool AllButMain) {
- return new InternalizePass(AllButMain);
+ModulePass *llvm::createInternalizePass() {
+ return new InternalizePass();
}
ModulePass *llvm::createInternalizePass(const std::vector <const char *> &el) {
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 9f70f66..44283dd 100644
--- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -63,7 +63,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include <vector>
using namespace llvm;
@@ -92,19 +92,19 @@ static unsigned profileFunction(const Function *F) {
namespace {
/// ComparableFunction - A struct that pairs together functions with a
-/// TargetData so that we can keep them together as elements in the DenseSet.
+/// DataLayout so that we can keep them together as elements in the DenseSet.
class ComparableFunction {
public:
static const ComparableFunction EmptyKey;
static const ComparableFunction TombstoneKey;
- static TargetData * const LookupOnly;
+ static DataLayout * const LookupOnly;
- ComparableFunction(Function *Func, TargetData *TD)
+ ComparableFunction(Function *Func, DataLayout *TD)
: Func(Func), Hash(profileFunction(Func)), TD(TD) {}
Function *getFunc() const { return Func; }
unsigned getHash() const { return Hash; }
- TargetData *getTD() const { return TD; }
+ DataLayout *getTD() const { return TD; }
// Drops AssertingVH reference to the function. Outside of debug mode, this
// does nothing.
@@ -120,13 +120,13 @@ private:
AssertingVH<Function> Func;
unsigned Hash;
- TargetData *TD;
+ DataLayout *TD;
};
const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0);
const ComparableFunction ComparableFunction::TombstoneKey =
ComparableFunction(1);
-TargetData *const ComparableFunction::LookupOnly = (TargetData*)(-1);
+DataLayout *const ComparableFunction::LookupOnly = (DataLayout*)(-1);
}
@@ -150,12 +150,12 @@ namespace llvm {
namespace {
/// FunctionComparator - Compares two functions to determine whether or not
-/// they will generate machine code with the same behaviour. TargetData is
+/// they will generate machine code with the same behaviour. DataLayout is
/// used if available. The comparator always fails conservatively (erring on the
/// side of claiming that two functions are different).
class FunctionComparator {
public:
- FunctionComparator(const TargetData *TD, const Function *F1,
+ FunctionComparator(const DataLayout *TD, const Function *F1,
const Function *F2)
: F1(F1), F2(F2), TD(TD) {}
@@ -190,7 +190,7 @@ private:
// The two functions undergoing comparison.
const Function *F1, *F2;
- const TargetData *TD;
+ const DataLayout *TD;
DenseMap<const Value *, const Value *> id_map;
DenseSet<const Value *> seen_values;
@@ -591,8 +591,8 @@ private:
/// to modify it.
FnSetType FnSet;
- /// TargetData for more accurate GEP comparisons. May be NULL.
- TargetData *TD;
+ /// DataLayout for more accurate GEP comparisons. May be NULL.
+ DataLayout *TD;
/// Whether or not the target supports global aliases.
bool HasGlobalAliases;
@@ -609,7 +609,7 @@ ModulePass *llvm::createMergeFunctionsPass() {
bool MergeFunctions::runOnModule(Module &M) {
bool Changed = false;
- TD = getAnalysisIfAvailable<TargetData>();
+ TD = getAnalysisIfAvailable<DataLayout>();
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 43b4ab5..05253fc 100644
--- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -33,13 +33,21 @@
using namespace llvm;
static cl::opt<bool>
-RunVectorization("vectorize", cl::desc("Run vectorization passes"));
+RunLoopVectorization("vectorize-loops",
+ cl::desc("Run the Loop vectorization passes"));
+
+static cl::opt<bool>
+RunBBVectorization("vectorize", cl::desc("Run the BB vectorization passes"));
static cl::opt<bool>
UseGVNAfterVectorization("use-gvn-after-vectorization",
cl::init(false), cl::Hidden,
cl::desc("Run GVN instead of Early CSE after vectorization passes"));
+static cl::opt<bool> UseNewSROA("use-new-sroa",
+ cl::init(true), cl::Hidden,
+ cl::desc("Enable the new, experimental SROA pass"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@@ -48,7 +56,8 @@ PassManagerBuilder::PassManagerBuilder() {
DisableSimplifyLibCalls = false;
DisableUnitAtATime = false;
DisableUnrollLoops = false;
- Vectorize = RunVectorization;
+ Vectorize = RunBBVectorization;
+ LoopVectorize = RunLoopVectorization;
}
PassManagerBuilder::~PassManagerBuilder() {
@@ -100,7 +109,10 @@ void PassManagerBuilder::populateFunctionPassManager(FunctionPassManager &FPM) {
addInitialAliasAnalysisPasses(FPM);
FPM.add(createCFGSimplificationPass());
- FPM.add(createScalarReplAggregatesPass());
+ if (UseNewSROA)
+ FPM.add(createSROAPass());
+ else
+ FPM.add(createScalarReplAggregatesPass());
FPM.add(createEarlyCSEPass());
FPM.add(createLowerExpectIntrinsicPass());
}
@@ -112,6 +124,14 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(Inliner);
Inliner = 0;
}
+
+ // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
+ // pass manager, but we don't want to add extensions into that pass manager.
+ // To prevent this we must insert a no-op module pass to reset the pass
+ // manager to get the same behavior as EP_OptimizerLast in non-O0 builds.
+ if (!GlobalExtensions->empty() || !Extensions.empty())
+ MPM.add(createBarrierNoopPass());
+
addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
return;
}
@@ -147,7 +167,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
// Start of function pass.
// Break up aggregate allocas, using SSAUpdater.
- MPM.add(createScalarReplAggregatesPass(-1, false));
+ if (UseNewSROA)
+ MPM.add(createSROAPass(/*RequiresDomTree*/ false));
+ else
+ MPM.add(createScalarReplAggregatesPass(-1, false));
MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
if (!DisableSimplifyLibCalls)
MPM.add(createSimplifyLibCallsPass()); // Library Call Optimizations
@@ -166,6 +189,12 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
MPM.add(createLoopDeletionPass()); // Delete dead loops
+
+ if (LoopVectorize) {
+ MPM.add(createLoopVectorizePass());
+ MPM.add(createLICMPass());
+ }
+
if (!DisableUnrollLoops)
MPM.add(createLoopUnrollPass()); // Unroll small loops
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
@@ -201,13 +230,12 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
// FIXME: We shouldn't bother with this anymore.
MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
- // GlobalOpt already deletes dead functions and globals, at -O3 try a
+ // GlobalOpt already deletes dead functions and globals, at -O2 try a
// late pass of GlobalDCE. It is capable of deleting dead cycles.
- if (OptLevel > 2)
+ if (OptLevel > 1) {
MPM.add(createGlobalDCEPass()); // Remove dead fns and globals.
-
- if (OptLevel > 1)
MPM.add(createConstantMergePass()); // Merge dup global constants
+ }
}
addExtensionsToPM(EP_OptimizerLast, MPM);
}
@@ -222,8 +250,11 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
// Now that composite has been compiled, scan through the module, looking
// for a main function. If main is defined, mark all other functions
// internal.
- if (Internalize)
- PM.add(createInternalizePass(true));
+ if (Internalize) {
+ std::vector<const char*> E;
+ E.push_back("main");
+ PM.add(createInternalizePass(E));
+ }
// Propagate constants at call sites into the functions they call. This
// opens opportunities for globalopt (and inlining) by substituting function
@@ -265,7 +296,10 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
PM.add(createInstructionCombiningPass());
PM.add(createJumpThreadingPass());
// Break up allocas
- PM.add(createScalarReplAggregatesPass());
+ if (UseNewSROA)
+ PM.add(createSROAPass());
+ else
+ PM.add(createScalarReplAggregatesPass());
// Run a few AA driven optimizations here and now, to cleanup the code.
PM.add(createFunctionAttrsPass()); // Add nocapture.
diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
index c8cc8fd..fb4ecbf 100644
--- a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -137,16 +137,18 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
// If the SCC doesn't unwind or doesn't throw, note this fact.
if (!SCCMightUnwind || !SCCMightReturn)
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Attributes NewAttributes = Attribute::None;
+ AttrBuilder NewAttributes;
if (!SCCMightUnwind)
- NewAttributes |= Attribute::NoUnwind;
+ NewAttributes.addAttribute(Attributes::NoUnwind);
if (!SCCMightReturn)
- NewAttributes |= Attribute::NoReturn;
+ NewAttributes.addAttribute(Attributes::NoReturn);
Function *F = (*I)->getFunction();
const AttrListPtr &PAL = F->getAttributes();
- const AttrListPtr &NPAL = PAL.addAttr(~0, NewAttributes);
+ const AttrListPtr &NPAL = PAL.addAttr(F->getContext(), ~0,
+ Attributes::get(F->getContext(),
+ NewAttributes));
if (PAL != NPAL) {
MadeChange = true;
F->setAttributes(NPAL);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
index 0d5ef90..7467eca 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
@@ -18,10 +18,11 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Support/InstVisitor.h"
#include "llvm/Support/TargetFolder.h"
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
namespace llvm {
class CallSite;
- class TargetData;
+ class DataLayout;
class TargetLibraryInfo;
class DbgDeclareInst;
class MemIntrinsic;
@@ -71,9 +72,10 @@ public:
class LLVM_LIBRARY_VISIBILITY InstCombiner
: public FunctionPass,
public InstVisitor<InstCombiner, Instruction*> {
- TargetData *TD;
+ DataLayout *TD;
TargetLibraryInfo *TLI;
bool MadeIRChange;
+ LibCallSimplifier *Simplifier;
public:
/// Worklist - All of the instructions that need to be simplified.
InstCombineWorklist Worklist;
@@ -95,7 +97,7 @@ public:
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- TargetData *getTargetData() const { return TD; }
+ DataLayout *getDataLayout() const { return TD; }
TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; }
@@ -218,7 +220,7 @@ private:
Type *Ty);
Instruction *visitCallSite(CallSite CS);
- Instruction *tryOptimizeCall(CallInst *CI, const TargetData *TD);
+ Instruction *tryOptimizeCall(CallInst *CI, const DataLayout *TD);
bool transformConstExprCastCall(CallSite CS);
Instruction *transformCallThroughTrampoline(CallSite CS,
IntrinsicInst *Tramp);
@@ -365,6 +367,10 @@ private:
Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned);
+
+ /// Descale - Return a value X such that Val = X * Scale, or null if none. If
+ /// the multiplication is known not to overflow then NoSignedWrap is set.
+ Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap);
};
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 99b62f8..d8257e6 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -13,7 +13,7 @@
#include "InstCombine.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index cbe1ca4..48f2704 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -13,7 +13,7 @@
#include "InstCombine.h"
#include "llvm/Support/CallSite.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -29,6 +29,26 @@ static Type *getPromotedType(Type *Ty) {
return Ty;
}
+/// reduceToSingleValueType - Given an aggregate type which ultimately holds a
+/// single scalar element, like {{{type}}} or [1 x type], return type.
+static Type *reduceToSingleValueType(Type *T) {
+ while (!T->isSingleValueType()) {
+ if (StructType *STy = dyn_cast<StructType>(T)) {
+ if (STy->getNumElements() == 1)
+ T = STy->getElementType(0);
+ else
+ break;
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(T)) {
+ if (ATy->getNumElements() == 1)
+ T = ATy->getElementType();
+ else
+ break;
+ } else
+ break;
+ }
+
+ return T;
+}
Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), TD);
@@ -74,35 +94,37 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
// dest address will be promotable. See if we can find a better type than the
// integer datatype.
Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts();
+ MDNode *CopyMD = 0;
if (StrippedDest != MI->getArgOperand(0)) {
Type *SrcETy = cast<PointerType>(StrippedDest->getType())
->getElementType();
if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
// The SrcETy might be something like {{{double}}} or [1 x double]. Rip
// down through these levels if so.
- while (!SrcETy->isSingleValueType()) {
- if (StructType *STy = dyn_cast<StructType>(SrcETy)) {
- if (STy->getNumElements() == 1)
- SrcETy = STy->getElementType(0);
- else
- break;
- } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) {
- if (ATy->getNumElements() == 1)
- SrcETy = ATy->getElementType();
- else
- break;
- } else
- break;
- }
+ SrcETy = reduceToSingleValueType(SrcETy);
if (SrcETy->isSingleValueType()) {
NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp);
NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp);
+
+ // If the memcpy has metadata describing the members, see if we can
+ // get the TBAA tag describing our copy.
+ if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
+ if (M->getNumOperands() == 3 &&
+ M->getOperand(0) &&
+ isa<ConstantInt>(M->getOperand(0)) &&
+ cast<ConstantInt>(M->getOperand(0))->isNullValue() &&
+ M->getOperand(1) &&
+ isa<ConstantInt>(M->getOperand(1)) &&
+ cast<ConstantInt>(M->getOperand(1))->getValue() == Size &&
+ M->getOperand(2) &&
+ isa<MDNode>(M->getOperand(2)))
+ CopyMD = cast<MDNode>(M->getOperand(2));
+ }
}
}
}
-
// If the memcpy/memmove provides better alignment info than we can
// infer, use it.
SrcAlign = std::max(SrcAlign, CopyAlign);
@@ -112,8 +134,12 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile());
L->setAlignment(SrcAlign);
+ if (CopyMD)
+ L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile());
S->setAlignment(DstAlign);
+ if (CopyMD)
+ S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
// Set the size of the copy to 0, it will be deleted on the next iteration.
MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
@@ -168,7 +194,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
/// the heavy lifting.
///
Instruction *InstCombiner::visitCallInst(CallInst &CI) {
- if (isFreeCall(&CI))
+ if (isFreeCall(&CI, TLI))
return visitFree(CI);
// If the caller function is nounwind, mark the call as nounwind, even if the
@@ -243,7 +269,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
default: break;
case Intrinsic::objectsize: {
uint64_t Size;
- if (getObjectSize(II->getArgOperand(0), Size, TD))
+ if (getObjectSize(II->getArgOperand(0), Size, TD, TLI))
return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size));
return 0;
}
@@ -731,7 +757,7 @@ Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
/// passed through the varargs area, we can eliminate the use of the cast.
static bool isSafeToEliminateVarargsCast(const CallSite CS,
const CastInst * const CI,
- const TargetData * const TD,
+ const DataLayout * const TD,
const int ix) {
if (!CI->isLosslessCast())
return false;
@@ -752,49 +778,17 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
return true;
}
-namespace {
-class InstCombineFortifiedLibCalls : public SimplifyFortifiedLibCalls {
- InstCombiner *IC;
-protected:
- void replaceCall(Value *With) {
- NewInstruction = IC->ReplaceInstUsesWith(*CI, With);
- }
- bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
- if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
- return true;
- if (ConstantInt *SizeCI =
- dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
- if (SizeCI->isAllOnesValue())
- return true;
- if (isString) {
- uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp));
- // If the length is 0 we don't know how long it is and so we can't
- // remove the check.
- if (Len == 0) return false;
- return SizeCI->getZExtValue() >= Len;
- }
- if (ConstantInt *Arg = dyn_cast<ConstantInt>(
- CI->getArgOperand(SizeArgOp)))
- return SizeCI->getZExtValue() >= Arg->getZExtValue();
- }
- return false;
- }
-public:
- InstCombineFortifiedLibCalls(InstCombiner *IC) : IC(IC), NewInstruction(0) { }
- Instruction *NewInstruction;
-};
-} // end anonymous namespace
-
// Try to fold some different type of calls here.
// Currently we're only working with the checking functions, memcpy_chk,
// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
// strcat_chk and strncat_chk.
-Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
+Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *TD) {
if (CI->getCalledFunction() == 0) return 0;
- InstCombineFortifiedLibCalls Simplifier(this);
- Simplifier.fold(CI, TD, TLI);
- return Simplifier.NewInstruction;
+ if (Value *With = Simplifier->optimizeCall(CI))
+ return ReplaceInstUsesWith(*CI, With);
+
+ return 0;
}
static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) {
@@ -877,7 +871,7 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) {
// visitCallSite - Improvements for call and invoke instructions.
//
Instruction *InstCombiner::visitCallSite(CallSite CS) {
- if (isAllocLikeFn(CS.getInstruction()))
+ if (isAllocLikeFn(CS.getInstruction(), TLI))
return visitAllocSite(*CS.getInstruction());
bool Changed = false;
@@ -961,7 +955,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
Changed = true;
}
- // Try to optimize the call if possible, we require TargetData for most of
+ // Try to optimize the call if possible, we require DataLayout for most of
// this. None of these calls are seen as possibly dead so go ahead and
// delete the instruction now.
if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
@@ -1013,8 +1007,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
return false; // Cannot transform this return value.
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
- Attributes RAttrs = CallerPAL.getRetAttributes();
- if (RAttrs & Attribute::typeIncompatible(NewRetTy))
+ AttrBuilder RAttrs = CallerPAL.getRetAttributes();
+ if (RAttrs.hasAttributes(Attributes::typeIncompatible(NewRetTy)))
return false; // Attribute not compatible with transformed value.
}
@@ -1044,12 +1038,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
return false; // Cannot transform this parameter value.
Attributes Attrs = CallerPAL.getParamAttributes(i + 1);
- if (Attrs & Attribute::typeIncompatible(ParamTy))
+ if (AttrBuilder(Attrs).
+ hasAttributes(Attributes::typeIncompatible(ParamTy)))
return false; // Attribute not compatible with transformed value.
// If the parameter is passed as a byval argument, then we have to have a
// sized type and the sized type has to have the same size as the old type.
- if (ParamTy != ActTy && (Attrs & Attribute::ByVal)) {
+ if (ParamTy != ActTy && Attrs.hasAttribute(Attributes::ByVal)) {
PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
return false;
@@ -1101,7 +1096,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams())
break;
Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs;
- if (PAttrs & Attribute::VarArgsIncompatible)
+ if (PAttrs.hasIncompatibleWithVarArgsAttrs())
return false;
}
@@ -1114,15 +1109,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
attrVec.reserve(NumCommonArgs);
// Get any return attributes.
- Attributes RAttrs = CallerPAL.getRetAttributes();
+ AttrBuilder RAttrs = CallerPAL.getRetAttributes();
// If the return value is not being used, the type may not be compatible
// with the existing attributes. Wipe out any problematic attributes.
- RAttrs &= ~Attribute::typeIncompatible(NewRetTy);
+ RAttrs.removeAttributes(Attributes::typeIncompatible(NewRetTy));
// Add the new return attributes.
- if (RAttrs)
- attrVec.push_back(AttributeWithIndex::get(0, RAttrs));
+ if (RAttrs.hasAttributes())
+ attrVec.push_back(
+ AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+ Attributes::get(FT->getContext(), RAttrs)));
AI = CS.arg_begin();
for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
@@ -1136,7 +1133,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
}
// Add any parameter attributes.
- if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
+ Attributes PAttrs = CallerPAL.getParamAttributes(i + 1);
+ if (PAttrs.hasAttributes())
attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
}
@@ -1164,19 +1162,23 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
}
// Add any parameter attributes.
- if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
+ Attributes PAttrs = CallerPAL.getParamAttributes(i + 1);
+ if (PAttrs.hasAttributes())
attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
}
}
}
- if (Attributes FnAttrs = CallerPAL.getFnAttributes())
- attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+ Attributes FnAttrs = CallerPAL.getFnAttributes();
+ if (FnAttrs.hasAttributes())
+ attrVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+ FnAttrs));
if (NewRetTy->isVoidTy())
Caller->setName(""); // Void type should not have a name.
- const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec);
+ const AttrListPtr &NewCallerPAL = AttrListPtr::get(Callee->getContext(),
+ attrVec);
Instruction *NC;
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
@@ -1240,8 +1242,9 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
// If the call already has the 'nest' attribute somewhere then give up -
// otherwise 'nest' would occur twice after splicing in the chain.
- if (Attrs.hasAttrSomewhere(Attribute::Nest))
- return 0;
+ for (unsigned I = 0, E = Attrs.getNumAttrs(); I != E; ++I)
+ if (Attrs.getAttributesAtIndex(I).hasAttribute(Attributes::Nest))
+ return 0;
assert(Tramp &&
"transformCallThroughTrampoline called with incorrect CallSite.");
@@ -1254,12 +1257,12 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
if (!NestAttrs.isEmpty()) {
unsigned NestIdx = 1;
Type *NestTy = 0;
- Attributes NestAttr = Attribute::None;
+ Attributes NestAttr;
// Look for a parameter marked with the 'nest' attribute.
for (FunctionType::param_iterator I = NestFTy->param_begin(),
E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
- if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) {
+ if (NestAttrs.getParamAttributes(NestIdx).hasAttribute(Attributes::Nest)){
// Record the parameter type and any other attributes.
NestTy = *I;
NestAttr = NestAttrs.getParamAttributes(NestIdx);
@@ -1278,8 +1281,10 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
// mean appending it. Likewise for attributes.
// Add any result attributes.
- if (Attributes Attr = Attrs.getRetAttributes())
- NewAttrs.push_back(AttributeWithIndex::get(0, Attr));
+ Attributes Attr = Attrs.getRetAttributes();
+ if (Attr.hasAttributes())
+ NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+ Attr));
{
unsigned Idx = 1;
@@ -1299,7 +1304,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
// Add the original argument and attributes.
NewArgs.push_back(*I);
- if (Attributes Attr = Attrs.getParamAttributes(Idx))
+ Attr = Attrs.getParamAttributes(Idx);
+ if (Attr.hasAttributes())
NewAttrs.push_back
(AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr));
@@ -1308,8 +1314,10 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
}
// Add any function attributes.
- if (Attributes Attr = Attrs.getFnAttributes())
- NewAttrs.push_back(AttributeWithIndex::get(~0, Attr));
+ Attr = Attrs.getFnAttributes();
+ if (Attr.hasAttributes())
+ NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+ Attr));
// The trampoline may have been bitcast to a bogus type (FTy).
// Handle this by synthesizing a new function type, equal to FTy
@@ -1348,7 +1356,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
NestF->getType() == PointerType::getUnqual(NewFTy) ?
NestF : ConstantExpr::getBitCast(NestF,
PointerType::getUnqual(NewFTy));
- const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs);
+ const AttrListPtr &NewPAL = AttrListPtr::get(FTy->getContext(), NewAttrs);
Instruction *NewCaller;
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 555b442..bb59db8 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -13,7 +13,7 @@
#include "InstCombine.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/PatternMatch.h"
using namespace llvm;
@@ -78,7 +78,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
/// try to eliminate the cast by moving the type information into the alloc.
Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
AllocaInst &AI) {
- // This requires TargetData to get the alloca alignment and size information.
+ // This requires DataLayout to get the alloca alignment and size information.
if (!TD) return 0;
PointerType *PTy = cast<PointerType>(CI.getType());
@@ -229,7 +229,7 @@ isEliminableCastPair(
const CastInst *CI, ///< The first cast instruction
unsigned opcode, ///< The opcode of the second cast instruction
Type *DstTy, ///< The target type for the second cast instruction
- TargetData *TD ///< The target data for pointer size
+ DataLayout *TD ///< The target data for pointer size
) {
Type *SrcTy = CI->getOperand(0)->getType(); // A from above
@@ -238,17 +238,20 @@ isEliminableCastPair(
// Get the opcodes of the two Cast instructions
Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
Instruction::CastOps secondOp = Instruction::CastOps(opcode);
-
+ Type *SrcIntPtrTy = TD && SrcTy->isPtrOrPtrVectorTy() ?
+ TD->getIntPtrType(SrcTy) : 0;
+ Type *MidIntPtrTy = TD && MidTy->isPtrOrPtrVectorTy() ?
+ TD->getIntPtrType(MidTy) : 0;
+ Type *DstIntPtrTy = TD && DstTy->isPtrOrPtrVectorTy() ?
+ TD->getIntPtrType(DstTy) : 0;
unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
- DstTy,
- TD ? TD->getIntPtrType(CI->getContext()) : 0);
-
+ DstTy, SrcIntPtrTy, MidIntPtrTy,
+ DstIntPtrTy);
+
// We don't want to form an inttoptr or ptrtoint that converts to an integer
// type that differs from the pointer size.
- if ((Res == Instruction::IntToPtr &&
- (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) ||
- (Res == Instruction::PtrToInt &&
- (!TD || DstTy != TD->getIntPtrType(CI->getContext()))))
+ if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
+ (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
Res = 0;
return Instruction::CastOps(Res);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index bdd310e..7c3f8fe 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -16,7 +16,8 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
@@ -473,7 +474,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
/// If we can't emit an optimized form for this expression, this returns null.
///
static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
- TargetData &TD = *IC.getTargetData();
+ DataLayout &TD = *IC.getDataLayout();
gep_type_iterator GTI = gep_type_begin(GEP);
// Check to see if this gep only has a single variable index. If so, and if
@@ -2355,8 +2356,25 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// Try not to increase register pressure.
BO0->hasOneUse() && BO1->hasOneUse()) {
// Determine Y and Z in the form icmp (X+Y), (X+Z).
- Value *Y = (A == C || A == D) ? B : A;
- Value *Z = (C == A || C == B) ? D : C;
+ Value *Y, *Z;
+ if (A == C) {
+ // C + B == C + D -> B == D
+ Y = B;
+ Z = D;
+ } else if (A == D) {
+ // D + B == C + D -> B == C
+ Y = B;
+ Z = C;
+ } else if (B == C) {
+ // A + C == C + D -> A == D
+ Y = A;
+ Z = D;
+ } else {
+ assert(B == D);
+ // A + D == C + D -> A == C
+ Y = A;
+ Z = C;
+ }
return new ICmpInst(Pred, Y, Z);
}
@@ -2894,10 +2912,6 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
if (!RHSF)
break;
- // We can't convert a PPC double double.
- if (RHSF->getType()->isPPC_FP128Ty())
- break;
-
const fltSemantics *Sem;
// FIXME: This shouldn't be here.
if (LHSExt->getSrcTy()->isHalfTy())
@@ -2910,6 +2924,8 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
Sem = &APFloat::IEEEquad;
else if (LHSExt->getSrcTy()->isX86_FP80Ty())
Sem = &APFloat::x87DoubleExtended;
+ else if (LHSExt->getSrcTy()->isPPC_FP128Ty())
+ Sem = &APFloat::PPCDoubleDouble;
else
break;
@@ -2985,6 +3001,44 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
return Res;
}
break;
+ case Instruction::Call: {
+ CallInst *CI = cast<CallInst>(LHSI);
+ LibFunc::Func Func;
+ // Various optimization for fabs compared with zero.
+ if (RHSC->isNullValue() && CI->getCalledFunction() &&
+ TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
+ TLI->has(Func)) {
+ if (Func == LibFunc::fabs || Func == LibFunc::fabsf ||
+ Func == LibFunc::fabsl) {
+ switch (I.getPredicate()) {
+ default: break;
+ // fabs(x) < 0 --> false
+ case FCmpInst::FCMP_OLT:
+ return ReplaceInstUsesWith(I, Builder->getFalse());
+ // fabs(x) > 0 --> x != 0
+ case FCmpInst::FCMP_OGT:
+ return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0),
+ RHSC);
+ // fabs(x) <= 0 --> x == 0
+ case FCmpInst::FCMP_OLE:
+ return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0),
+ RHSC);
+ // fabs(x) >= 0 --> !isnan(x)
+ case FCmpInst::FCMP_OGE:
+ return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0),
+ RHSC);
+ // fabs(x) == 0 --> x == 0
+ // fabs(x) != 0 --> x != 0
+ case FCmpInst::FCMP_OEQ:
+ case FCmpInst::FCMP_UEQ:
+ case FCmpInst::FCMP_ONE:
+ case FCmpInst::FCMP_UNE:
+ return new FCmpInst(I.getPredicate(), CI->getArgOperand(0),
+ RHSC);
+ }
+ }
+ }
+ }
}
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index c485844..4d106fc 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -14,13 +14,161 @@
#include "InstCombine.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Analysis/Loads.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
-STATISTIC(NumDeadStore, "Number of dead stores eliminated");
+STATISTIC(NumDeadStore, "Number of dead stores eliminated");
+STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
+
+/// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
+/// some part of a constant global variable. This intentionally only accepts
+/// constant expressions because we can't rewrite arbitrary instructions.
+static bool pointsToConstantGlobal(Value *V) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return GV->isConstant();
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::BitCast ||
+ CE->getOpcode() == Instruction::GetElementPtr)
+ return pointsToConstantGlobal(CE->getOperand(0));
+ return false;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
+/// pointer to an alloca. Ignore any reads of the pointer, return false if we
+/// see any stores or other unknown uses. If we see pointer arithmetic, keep
+/// track of whether it moves the pointer (with IsOffset) but otherwise traverse
+/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
+/// the alloca, and if the source pointer is a pointer to a constant global, we
+/// can optimize this.
+static bool
+isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
+ SmallVectorImpl<Instruction *> &ToDelete,
+ bool IsOffset = false) {
+ // We track lifetime intrinsics as we encounter them. If we decide to go
+ // ahead and replace the value with the global, this lets the caller quickly
+ // eliminate the markers.
+
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
+ User *U = cast<Instruction>(*UI);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ // Ignore non-volatile loads, they are always ok.
+ if (!LI->isSimple()) return false;
+ continue;
+ }
+
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ // If uses of the bitcast are ok, we are ok.
+ if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset))
+ return false;
+ continue;
+ }
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ // If the GEP has all zero indices, it doesn't offset the pointer. If it
+ // doesn't, it does.
+ if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete,
+ IsOffset || !GEP->hasAllZeroIndices()))
+ return false;
+ continue;
+ }
+
+ if (CallSite CS = U) {
+ // If this is the function being called then we treat it like a load and
+ // ignore it.
+ if (CS.isCallee(UI))
+ continue;
+
+ // If this is a readonly/readnone call site, then we know it is just a
+ // load (but one that potentially returns the value itself), so we can
+ // ignore it if we know that the value isn't captured.
+ unsigned ArgNo = CS.getArgumentNo(UI);
+ if (CS.onlyReadsMemory() &&
+ (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
+ continue;
+
+ // If this is being passed as a byval argument, the caller is making a
+ // copy, so it is only a read of the alloca.
+ if (CS.isByValArgument(ArgNo))
+ continue;
+ }
+
+ // Lifetime intrinsics can be handled by the caller.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ assert(II->use_empty() && "Lifetime markers have no result to use!");
+ ToDelete.push_back(II);
+ continue;
+ }
+ }
+
+ // If this is isn't our memcpy/memmove, reject it as something we can't
+ // handle.
+ MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
+ if (MI == 0)
+ return false;
+
+ // If the transfer is using the alloca as a source of the transfer, then
+ // ignore it since it is a load (unless the transfer is volatile).
+ if (UI.getOperandNo() == 1) {
+ if (MI->isVolatile()) return false;
+ continue;
+ }
+
+ // If we already have seen a copy, reject the second one.
+ if (TheCopy) return false;
+
+ // If the pointer has been offset from the start of the alloca, we can't
+ // safely handle this.
+ if (IsOffset) return false;
+
+ // If the memintrinsic isn't using the alloca as the dest, reject it.
+ if (UI.getOperandNo() != 0) return false;
+
+ // If the source of the memcpy/move is not a constant global, reject it.
+ if (!pointsToConstantGlobal(MI->getSource()))
+ return false;
+
+ // Otherwise, the transform is safe. Remember the copy instruction.
+ TheCopy = MI;
+ }
+ return true;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
+/// modified by a copy from a constant global. If we can prove this, we can
+/// replace any uses of the alloca with uses of the global directly.
+static MemTransferInst *
+isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
+ SmallVectorImpl<Instruction *> &ToDelete) {
+ MemTransferInst *TheCopy = 0;
+ if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
+ return TheCopy;
+ return 0;
+}
+
+/// getPointeeAlignment - Compute the minimum alignment of the value pointed
+/// to by the given pointer.
+static unsigned getPointeeAlignment(Value *V, const DataLayout &TD) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::BitCast ||
+ (CE->getOpcode() == Instruction::GetElementPtr &&
+ cast<GEPOperator>(CE)->hasAllZeroIndices()))
+ return getPointeeAlignment(CE->getOperand(0), TD);
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ if (!GV->isDeclaration())
+ return TD.getPreferredAlignment(GV);
+
+ if (PointerType *PT = dyn_cast<PointerType>(V->getType()))
+ if (PT->getElementType()->isSized())
+ return TD.getABITypeAlignment(PT->getElementType());
+
+ return 0;
+}
Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Ensure that the alloca array size argument has type intptr_t, so that
@@ -99,12 +247,16 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
return &AI;
}
+ // If the alignment of the entry block alloca is 0 (unspecified),
+ // assign it the preferred alignment.
+ if (EntryAI->getAlignment() == 0)
+ EntryAI->setAlignment(
+ TD->getPrefTypeAlignment(EntryAI->getAllocatedType()));
// Replace this zero-sized alloca with the one at the start of the entry
// block after ensuring that the address will be aligned enough for both
// types.
- unsigned MaxAlign =
- std::max(TD->getPrefTypeAlignment(EntryAI->getAllocatedType()),
- TD->getPrefTypeAlignment(AI.getAllocatedType()));
+ unsigned MaxAlign = std::max(EntryAI->getAlignment(),
+ AI.getAlignment());
EntryAI->setAlignment(MaxAlign);
if (AI.getType() != EntryAI->getType())
return new BitCastInst(EntryAI, AI.getType());
@@ -113,6 +265,31 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
}
}
+ if (TD) {
+ // Check to see if this allocation is only modified by a memcpy/memmove from
+ // a constant global whose alignment is equal to or exceeds that of the
+ // allocation. If this is the case, we can change all users to use
+ // the constant global instead. This is commonly produced by the CFE by
+ // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
+ // is only subsequently read.
+ SmallVector<Instruction *, 4> ToDelete;
+ if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
+ if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) {
+ DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
+ DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
+ for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
+ EraseInstFromFunction(*ToDelete[i]);
+ Constant *TheSrc = cast<Constant>(Copy->getSource());
+ Instruction *NewI
+ = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc,
+ AI.getType()));
+ EraseInstFromFunction(*Copy);
+ ++NumGlobalCopies;
+ return NewI;
+ }
+ }
+ }
+
// At last, use the generic allocation site handler to aggressively remove
// unused allocas.
return visitAllocSite(AI);
@@ -121,7 +298,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
- const TargetData *TD) {
+ const DataLayout *TD) {
User *CI = cast<User>(LI.getOperand(0));
Value *CastOp = CI->getOperand(0);
@@ -151,14 +328,14 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
SrcPTy = SrcTy->getElementType();
}
- if (IC.getTargetData() &&
+ if (IC.getDataLayout() &&
(SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
SrcPTy->isVectorTy()) &&
// Do not allow turning this into a load of an integer, which is then
// casted to a pointer, this pessimizes pointer analysis a lot.
(SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) &&
- IC.getTargetData()->getTypeSizeInBits(SrcPTy) ==
- IC.getTargetData()->getTypeSizeInBits(DestPTy)) {
+ IC.getDataLayout()->getTypeSizeInBits(SrcPTy) ==
+ IC.getDataLayout()->getTypeSizeInBits(DestPTy)) {
// Okay, we are casting from one integer or pointer type to another of
// the same size. Instead of casting the pointer before the load, cast
@@ -336,11 +513,11 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
// If the pointers point into different address spaces or if they point to
// values with different sizes, we can't do the transformation.
- if (!IC.getTargetData() ||
+ if (!IC.getDataLayout() ||
SrcTy->getAddressSpace() !=
cast<PointerType>(CI->getType())->getAddressSpace() ||
- IC.getTargetData()->getTypeSizeInBits(SrcPTy) !=
- IC.getTargetData()->getTypeSizeInBits(DestPTy))
+ IC.getDataLayout()->getTypeSizeInBits(SrcPTy) !=
+ IC.getDataLayout()->getTypeSizeInBits(DestPTy))
return 0;
// Okay, we are casting from one integer or pointer type to another of
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 35a0bbb..cefe45e 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -37,7 +37,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(PowerOf2), m_Value(A))),
m_Value(B))) &&
// The "1" can be any value known to be a power of 2.
- isPowerOfTwo(PowerOf2, IC.getTargetData())) {
+ isPowerOfTwo(PowerOf2, IC.getDataLayout())) {
A = IC.Builder->CreateSub(A, B);
return IC.Builder->CreateShl(PowerOf2, A);
}
@@ -46,7 +46,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
// inexact. Similarly for <<.
if (BinaryOperator *I = dyn_cast<BinaryOperator>(V))
if (I->isLogicalShift() &&
- isPowerOfTwo(I->getOperand(0), IC.getTargetData())) {
+ isPowerOfTwo(I->getOperand(0), IC.getDataLayout())) {
// We know that this is an exact/nuw shift and that the input is a
// non-zero context as well.
if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC)) {
@@ -462,12 +462,23 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
}
}
+ // (x lshr C1) udiv C2 --> x udiv (C2 << C1)
+ if (ConstantInt *C2 = dyn_cast<ConstantInt>(Op1)) {
+ Value *X;
+ ConstantInt *C1;
+ if (match(Op0, m_LShr(m_Value(X), m_ConstantInt(C1)))) {
+ APInt NC = C2->getValue().shl(C1->getLimitedValue(C1->getBitWidth()-1));
+ return BinaryOperator::CreateUDiv(X, Builder->getInt(NC));
+ }
+ }
+
// X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2)
{ const APInt *CI; Value *N;
if (match(Op1, m_Shl(m_Power2(CI), m_Value(N))) ||
match(Op1, m_ZExt(m_Shl(m_Power2(CI), m_Value(N))))) {
if (*CI != 1)
- N = Builder->CreateAdd(N, ConstantInt::get(I.getType(),CI->logBase2()));
+ N = Builder->CreateAdd(N,
+ ConstantInt::get(N->getType(), CI->logBase2()));
if (ZExtInst *Z = dyn_cast<ZExtInst>(Op1))
N = Builder->CreateZExt(N, Z->getDestTy());
if (I.isExact())
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 664546c..de9c77e 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -13,7 +13,7 @@
#include "InstCombine.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/STLExtras.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 291e800..a2d4c88 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -287,7 +287,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
/// replaced with RepOp.
static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
- const TargetData *TD,
+ const DataLayout *TD,
const TargetLibraryInfo *TLI) {
// Trivial replacement.
if (V == Op)
@@ -333,6 +333,10 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
// All operands were constants, fold it.
if (ConstOps.size() == I->getNumOperands()) {
+ if (CmpInst *C = dyn_cast<CmpInst>(I))
+ return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0],
+ ConstOps[1], TD, TLI);
+
if (LoadInst *LI = dyn_cast<LoadInst>(I))
if (!LI->isVolatile())
return ConstantFoldLoadFromConstPtr(ConstOps[0], TD);
@@ -903,7 +907,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return &SI;
}
- if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) {
+ if (VectorType *VecTy = dyn_cast<VectorType>(SI.getType())) {
unsigned VWidth = VecTy->getNumElements();
APInt UndefElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
@@ -912,6 +916,28 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return ReplaceInstUsesWith(SI, V);
return &SI;
}
+
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(CondVal)) {
+ // Form a shufflevector instruction.
+ SmallVector<Constant *, 8> Mask(VWidth);
+ Type *Int32Ty = Type::getInt32Ty(CV->getContext());
+ for (unsigned i = 0; i != VWidth; ++i) {
+ Constant *Elem = cast<Constant>(CV->getOperand(i));
+ if (ConstantInt *E = dyn_cast<ConstantInt>(Elem))
+ Mask[i] = ConstantInt::get(Int32Ty, i + (E->isZero() ? VWidth : 0));
+ else if (isa<UndefValue>(Elem))
+ Mask[i] = UndefValue::get(Int32Ty);
+ else
+ return 0;
+ }
+ Constant *MaskVal = ConstantVector::get(Mask);
+ Value *V = Builder->CreateShuffleVector(TrueVal, FalseVal, MaskVal);
+ return ReplaceInstUsesWith(SI, V);
+ }
+
+ if (isa<ConstantAggregateZero>(CondVal)) {
+ return ReplaceInstUsesWith(SI, FalseVal);
+ }
}
return 0;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 4bb2403..57021f1 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -190,7 +190,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
V = IC.Builder->CreateLShr(C, NumBits);
// If we got a constantexpr back, try to simplify it with TD info.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- V = ConstantFoldConstantExpression(CE, IC.getTargetData(),
+ V = ConstantFoldConstantExpression(CE, IC.getDataLayout(),
IC.getTargetLibraryInfo());
return V;
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 54be8ed..602b203 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -14,7 +14,7 @@
#include "InstCombine.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/IntrinsicInst.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index cf60f0f..dd7ea14 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -636,8 +636,11 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// If LHS's width is changed, shift the mask value accordingly.
// If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any
- // references to RHSOp0 to LHSOp0, so we don't need to shift the mask.
- if (eltMask >= 0 && newRHS != NULL)
+ // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
+ // If newRHS == newLHS, we want to remap any references from newRHS to
+ // newLHS so that we can properly identify splats that may occur due to
+ // obfuscation accross the two vectors.
+ if (eltMask >= 0 && newRHS != NULL && newLHS != newRHS)
eltMask += newLHSWidth;
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
index 99a02fc..ea654ae 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -26,8 +26,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombineWorklist {
SmallVector<Instruction*, 256> Worklist;
DenseMap<Instruction*, unsigned> WorklistMap;
- void operator=(const InstCombineWorklist&RHS); // DO NOT IMPLEMENT
- InstCombineWorklist(const InstCombineWorklist&); // DO NOT IMPLEMENT
+ void operator=(const InstCombineWorklist&RHS) LLVM_DELETED_FUNCTION;
+ InstCombineWorklist(const InstCombineWorklist&) LLVM_DELETED_FUNCTION;
public:
InstCombineWorklist() {}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 68ecd51..9a46f25 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -40,7 +40,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CFG.h"
@@ -88,7 +88,7 @@ void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
Value *InstCombiner::EmitGEPOffset(User *GEP) {
- return llvm::EmitGEPOffset(Builder, *getTargetData(), GEP);
+ return llvm::EmitGEPOffset(Builder, *getDataLayout(), GEP);
}
/// ShouldChangeType - Return true if it is desirable to convert a computation
@@ -805,6 +805,244 @@ static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
return true;
}
+/// Descale - Return a value X such that Val = X * Scale, or null if none. If
+/// the multiplication is known not to overflow then NoSignedWrap is set.
+Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
+ assert(isa<IntegerType>(Val->getType()) && "Can only descale integers!");
+ assert(cast<IntegerType>(Val->getType())->getBitWidth() ==
+ Scale.getBitWidth() && "Scale not compatible with value!");
+
+ // If Val is zero or Scale is one then Val = Val * Scale.
+ if (match(Val, m_Zero()) || Scale == 1) {
+ NoSignedWrap = true;
+ return Val;
+ }
+
+ // If Scale is zero then it does not divide Val.
+ if (Scale.isMinValue())
+ return 0;
+
+ // Look through chains of multiplications, searching for a constant that is
+ // divisible by Scale. For example, descaling X*(Y*(Z*4)) by a factor of 4
+ // will find the constant factor 4 and produce X*(Y*Z). Descaling X*(Y*8) by
+ // a factor of 4 will produce X*(Y*2). The principle of operation is to bore
+ // down from Val:
+ //
+ // Val = M1 * X || Analysis starts here and works down
+ // M1 = M2 * Y || Doesn't descend into terms with more
+ // M2 = Z * 4 \/ than one use
+ //
+ // Then to modify a term at the bottom:
+ //
+ // Val = M1 * X
+ // M1 = Z * Y || Replaced M2 with Z
+ //
+ // Then to work back up correcting nsw flags.
+
+ // Op - the term we are currently analyzing. Starts at Val then drills down.
+ // Replaced with its descaled value before exiting from the drill down loop.
+ Value *Op = Val;
+
+ // Parent - initially null, but after drilling down notes where Op came from.
+ // In the example above, Parent is (Val, 0) when Op is M1, because M1 is the
+ // 0'th operand of Val.
+ std::pair<Instruction*, unsigned> Parent;
+
+ // RequireNoSignedWrap - Set if the transform requires a descaling at deeper
+ // levels that doesn't overflow.
+ bool RequireNoSignedWrap = false;
+
+ // logScale - log base 2 of the scale. Negative if not a power of 2.
+ int32_t logScale = Scale.exactLogBase2();
+
+ for (;; Op = Parent.first->getOperand(Parent.second)) { // Drill down
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // If Op is a constant divisible by Scale then descale to the quotient.
+ APInt Quotient(Scale), Remainder(Scale); // Init ensures right bitwidth.
+ APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder);
+ if (!Remainder.isMinValue())
+ // Not divisible by Scale.
+ return 0;
+ // Replace with the quotient in the parent.
+ Op = ConstantInt::get(CI->getType(), Quotient);
+ NoSignedWrap = true;
+ break;
+ }
+
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op)) {
+
+ if (BO->getOpcode() == Instruction::Mul) {
+ // Multiplication.
+ NoSignedWrap = BO->hasNoSignedWrap();
+ if (RequireNoSignedWrap && !NoSignedWrap)
+ return 0;
+
+ // There are three cases for multiplication: multiplication by exactly
+ // the scale, multiplication by a constant different to the scale, and
+ // multiplication by something else.
+ Value *LHS = BO->getOperand(0);
+ Value *RHS = BO->getOperand(1);
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ // Multiplication by a constant.
+ if (CI->getValue() == Scale) {
+ // Multiplication by exactly the scale, replace the multiplication
+ // by its left-hand side in the parent.
+ Op = LHS;
+ break;
+ }
+
+ // Otherwise drill down into the constant.
+ if (!Op->hasOneUse())
+ return 0;
+
+ Parent = std::make_pair(BO, 1);
+ continue;
+ }
+
+ // Multiplication by something else. Drill down into the left-hand side
+ // since that's where the reassociate pass puts the good stuff.
+ if (!Op->hasOneUse())
+ return 0;
+
+ Parent = std::make_pair(BO, 0);
+ continue;
+ }
+
+ if (logScale > 0 && BO->getOpcode() == Instruction::Shl &&
+ isa<ConstantInt>(BO->getOperand(1))) {
+ // Multiplication by a power of 2.
+ NoSignedWrap = BO->hasNoSignedWrap();
+ if (RequireNoSignedWrap && !NoSignedWrap)
+ return 0;
+
+ Value *LHS = BO->getOperand(0);
+ int32_t Amt = cast<ConstantInt>(BO->getOperand(1))->
+ getLimitedValue(Scale.getBitWidth());
+ // Op = LHS << Amt.
+
+ if (Amt == logScale) {
+ // Multiplication by exactly the scale, replace the multiplication
+ // by its left-hand side in the parent.
+ Op = LHS;
+ break;
+ }
+ if (Amt < logScale || !Op->hasOneUse())
+ return 0;
+
+ // Multiplication by more than the scale. Reduce the multiplying amount
+ // by the scale in the parent.
+ Parent = std::make_pair(BO, 1);
+ Op = ConstantInt::get(BO->getType(), Amt - logScale);
+ break;
+ }
+ }
+
+ if (!Op->hasOneUse())
+ return 0;
+
+ if (CastInst *Cast = dyn_cast<CastInst>(Op)) {
+ if (Cast->getOpcode() == Instruction::SExt) {
+ // Op is sign-extended from a smaller type, descale in the smaller type.
+ unsigned SmallSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
+ APInt SmallScale = Scale.trunc(SmallSize);
+ // Suppose Op = sext X, and we descale X as Y * SmallScale. We want to
+ // descale Op as (sext Y) * Scale. In order to have
+ // sext (Y * SmallScale) = (sext Y) * Scale
+ // some conditions need to hold however: SmallScale must sign-extend to
+ // Scale and the multiplication Y * SmallScale should not overflow.
+ if (SmallScale.sext(Scale.getBitWidth()) != Scale)
+ // SmallScale does not sign-extend to Scale.
+ return 0;
+ assert(SmallScale.exactLogBase2() == logScale);
+ // Require that Y * SmallScale must not overflow.
+ RequireNoSignedWrap = true;
+
+ // Drill down through the cast.
+ Parent = std::make_pair(Cast, 0);
+ Scale = SmallScale;
+ continue;
+ }
+
+ if (Cast->getOpcode() == Instruction::Trunc) {
+ // Op is truncated from a larger type, descale in the larger type.
+ // Suppose Op = trunc X, and we descale X as Y * sext Scale. Then
+ // trunc (Y * sext Scale) = (trunc Y) * Scale
+ // always holds. However (trunc Y) * Scale may overflow even if
+ // trunc (Y * sext Scale) does not, so nsw flags need to be cleared
+ // from this point up in the expression (see later).
+ if (RequireNoSignedWrap)
+ return 0;
+
+ // Drill down through the cast.
+ unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
+ Parent = std::make_pair(Cast, 0);
+ Scale = Scale.sext(LargeSize);
+ if (logScale + 1 == (int32_t)Cast->getType()->getPrimitiveSizeInBits())
+ logScale = -1;
+ assert(Scale.exactLogBase2() == logScale);
+ continue;
+ }
+ }
+
+ // Unsupported expression, bail out.
+ return 0;
+ }
+
+ // We know that we can successfully descale, so from here on we can safely
+ // modify the IR. Op holds the descaled version of the deepest term in the
+ // expression. NoSignedWrap is 'true' if multiplying Op by Scale is known
+ // not to overflow.
+
+ if (!Parent.first)
+ // The expression only had one term.
+ return Op;
+
+ // Rewrite the parent using the descaled version of its operand.
+ assert(Parent.first->hasOneUse() && "Drilled down when more than one use!");
+ assert(Op != Parent.first->getOperand(Parent.second) &&
+ "Descaling was a no-op?");
+ Parent.first->setOperand(Parent.second, Op);
+ Worklist.Add(Parent.first);
+
+ // Now work back up the expression correcting nsw flags. The logic is based
+ // on the following observation: if X * Y is known not to overflow as a signed
+ // multiplication, and Y is replaced by a value Z with smaller absolute value,
+ // then X * Z will not overflow as a signed multiplication either. As we work
+ // our way up, having NoSignedWrap 'true' means that the descaled value at the
+ // current level has strictly smaller absolute value than the original.
+ Instruction *Ancestor = Parent.first;
+ do {
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Ancestor)) {
+ // If the multiplication wasn't nsw then we can't say anything about the
+ // value of the descaled multiplication, and we have to clear nsw flags
+ // from this point on up.
+ bool OpNoSignedWrap = BO->hasNoSignedWrap();
+ NoSignedWrap &= OpNoSignedWrap;
+ if (NoSignedWrap != OpNoSignedWrap) {
+ BO->setHasNoSignedWrap(NoSignedWrap);
+ Worklist.Add(Ancestor);
+ }
+ } else if (Ancestor->getOpcode() == Instruction::Trunc) {
+ // The fact that the descaled input to the trunc has smaller absolute
+ // value than the original input doesn't tell us anything useful about
+ // the absolute values of the truncations.
+ NoSignedWrap = false;
+ }
+ assert((Ancestor->getOpcode() != Instruction::SExt || NoSignedWrap) &&
+ "Failed to keep proper track of nsw flags while drilling down?");
+
+ if (Ancestor == Val)
+ // Got to the top, all done!
+ return Val;
+
+ // Move up one level in the expression.
+ assert(Ancestor->hasOneUse() && "Drilled down when more than one use!");
+ Ancestor = Ancestor->use_back();
+ } while (1);
+}
+
Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
@@ -817,7 +1055,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// by multiples of a zero size type with zero.
if (TD) {
bool MadeChange = false;
- Type *IntPtrTy = TD->getIntPtrType(GEP.getContext());
+ Type *IntPtrTy = TD->getIntPtrType(GEP.getPointerOperandType());
gep_type_iterator GTI = gep_type_begin(GEP);
for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
@@ -836,7 +1074,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
Type *IndexTy = (*I)->getType();
- if (IndexTy != IntPtrTy && !IndexTy->isVectorTy()) {
+ if (IndexTy != IntPtrTy) {
// If we are using a wider index than needed for this platform, shrink
// it to what we need. If narrower, sign-extend it to what we need.
// This explicit cast can make subsequent optimizations more obvious.
@@ -855,7 +1093,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
return 0;
- // Note that if our source is a gep chain itself that we wait for that
+ // Note that if our source is a gep chain itself then we wait for that
// chain to be resolved before we perform this transformation. This
// avoids us creating a TON of code in some cases.
if (GEPOperator *SrcGEP =
@@ -987,63 +1225,74 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
// Transform things like:
+ // %V = mul i64 %N, 4
+ // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
+ // into: %t1 = getelementptr i32* %arr, i32 %N; bitcast
+ if (TD && ResElTy->isSized() && SrcElTy->isSized()) {
+ // Check that changing the type amounts to dividing the index by a scale
+ // factor.
+ uint64_t ResSize = TD->getTypeAllocSize(ResElTy);
+ uint64_t SrcSize = TD->getTypeAllocSize(SrcElTy);
+ if (ResSize && SrcSize % ResSize == 0) {
+ Value *Idx = GEP.getOperand(1);
+ unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
+ uint64_t Scale = SrcSize / ResSize;
+
+ // Earlier transforms ensure that the index has type IntPtrType, which
+ // considerably simplifies the logic by eliminating implicit casts.
+ assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) &&
+ "Index not cast to pointer width?");
+
+ bool NSW;
+ if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
+ // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
+ // If the multiplication NewIdx * Scale may overflow then the new
+ // GEP may not be "inbounds".
+ Value *NewGEP = GEP.isInBounds() && NSW ?
+ Builder->CreateInBoundsGEP(StrippedPtr, NewIdx, GEP.getName()) :
+ Builder->CreateGEP(StrippedPtr, NewIdx, GEP.getName());
+ // The NewGEP must be pointer typed, so must the old one -> BitCast
+ return new BitCastInst(NewGEP, GEP.getType());
+ }
+ }
+ }
+
+ // Similarly, transform things like:
// getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
// (where tmp = 8*tmp2) into:
// getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
-
- if (TD && SrcElTy->isArrayTy() && ResElTy->isIntegerTy(8)) {
+ if (TD && ResElTy->isSized() && SrcElTy->isSized() &&
+ SrcElTy->isArrayTy()) {
+ // Check that changing to the array element type amounts to dividing the
+ // index by a scale factor.
+ uint64_t ResSize = TD->getTypeAllocSize(ResElTy);
uint64_t ArrayEltSize =
- TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
-
- // Check to see if "tmp" is a scale by a multiple of ArrayEltSize. We
- // allow either a mul, shift, or constant here.
- Value *NewIdx = 0;
- ConstantInt *Scale = 0;
- if (ArrayEltSize == 1) {
- NewIdx = GEP.getOperand(1);
- Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1);
- } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) {
- NewIdx = ConstantInt::get(CI->getType(), 1);
- Scale = CI;
- } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){
- if (Inst->getOpcode() == Instruction::Shl &&
- isa<ConstantInt>(Inst->getOperand(1))) {
- ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1));
- uint32_t ShAmtVal = ShAmt->getLimitedValue(64);
- Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()),
- 1ULL << ShAmtVal);
- NewIdx = Inst->getOperand(0);
- } else if (Inst->getOpcode() == Instruction::Mul &&
- isa<ConstantInt>(Inst->getOperand(1))) {
- Scale = cast<ConstantInt>(Inst->getOperand(1));
- NewIdx = Inst->getOperand(0);
+ TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
+ if (ResSize && ArrayEltSize % ResSize == 0) {
+ Value *Idx = GEP.getOperand(1);
+ unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
+ uint64_t Scale = ArrayEltSize / ResSize;
+
+ // Earlier transforms ensure that the index has type IntPtrType, which
+ // considerably simplifies the logic by eliminating implicit casts.
+ assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) &&
+ "Index not cast to pointer width?");
+
+ bool NSW;
+ if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
+ // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
+ // If the multiplication NewIdx * Scale may overflow then the new
+ // GEP may not be "inbounds".
+ Value *Off[2];
+ Off[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
+ Off[1] = NewIdx;
+ Value *NewGEP = GEP.isInBounds() && NSW ?
+ Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) :
+ Builder->CreateGEP(StrippedPtr, Off, GEP.getName());
+ // The NewGEP must be pointer typed, so must the old one -> BitCast
+ return new BitCastInst(NewGEP, GEP.getType());
}
}
-
- // If the index will be to exactly the right offset with the scale taken
- // out, perform the transformation. Note, we don't know whether Scale is
- // signed or not. We'll use unsigned version of division/modulo
- // operation after making sure Scale doesn't have the sign bit set.
- if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL &&
- Scale->getZExtValue() % ArrayEltSize == 0) {
- Scale = ConstantInt::get(Scale->getType(),
- Scale->getZExtValue() / ArrayEltSize);
- if (Scale->getZExtValue() != 1) {
- Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(),
- false /*ZExt*/);
- NewIdx = Builder->CreateMul(NewIdx, C, "idxscale");
- }
-
- // Insert the new GEP instruction.
- Value *Idx[2];
- Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
- Idx[1] = NewIdx;
- Value *NewGEP = GEP.isInBounds() ?
- Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()):
- Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
- // The NewGEP must be pointer typed, so must the old one -> BitCast
- return new BitCastInst(NewGEP, GEP.getType());
- }
}
}
}
@@ -1068,7 +1317,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// If the bitcast is of an allocation, and the allocation will be
// converted to match the type of the cast, don't touch this.
if (isa<AllocaInst>(BCI->getOperand(0)) ||
- isAllocationFn(BCI->getOperand(0))) {
+ isAllocationFn(BCI->getOperand(0), TLI)) {
// See if the bitcast simplifies, if so, don't nuke this GEP yet.
if (Instruction *I = visitBitCast(*BCI)) {
if (I != BCI) {
@@ -1107,7 +1356,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
static bool
-isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users) {
+isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
+ const TargetLibraryInfo *TLI) {
SmallVector<Instruction*, 4> Worklist;
Worklist.push_back(AI);
@@ -1163,7 +1413,7 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users) {
}
}
- if (isFreeCall(I)) {
+ if (isFreeCall(I, TLI)) {
Users.push_back(I);
continue;
}
@@ -1188,7 +1438,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
// to null and free calls, delete the calls and replace the comparisons with
// true or false as appropriate.
SmallVector<WeakVH, 64> Users;
- if (isAllocSiteRemovable(&MI, Users)) {
+ if (isAllocSiteRemovable(&MI, Users, TLI)) {
for (unsigned i = 0, e = Users.size(); i != e; ++i) {
Instruction *I = cast_or_null<Instruction>(&*Users[i]);
if (!I) continue;
@@ -1853,7 +2103,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
static bool AddReachableCodeToWorklist(BasicBlock *BB,
SmallPtrSet<BasicBlock*, 64> &Visited,
InstCombiner &IC,
- const TargetData *TD,
+ const DataLayout *TD,
const TargetLibraryInfo *TLI) {
bool MadeIRChange = false;
SmallVector<BasicBlock*, 256> Worklist;
@@ -1872,7 +2122,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
Instruction *Inst = BBI++;
// DCE instruction if trivially dead.
- if (isInstructionTriviallyDead(Inst)) {
+ if (isInstructionTriviallyDead(Inst, TLI)) {
++NumDeadInst;
DEBUG(errs() << "IC: DCE: " << *Inst << '\n');
Inst->eraseFromParent();
@@ -2002,7 +2252,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
if (I == 0) continue; // skip null values.
// Check to see if we can DCE the instruction.
- if (isInstructionTriviallyDead(I)) {
+ if (isInstructionTriviallyDead(I, TLI)) {
DEBUG(errs() << "IC: DCE: " << *I << '\n');
EraseInstFromFunction(*I);
++NumDeadInst;
@@ -2102,7 +2352,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// If the instruction was modified, it's possible that it is now dead.
// if so, remove it.
- if (isInstructionTriviallyDead(I)) {
+ if (isInstructionTriviallyDead(I, TLI)) {
EraseInstFromFunction(*I);
} else {
Worklist.Add(I);
@@ -2117,9 +2367,27 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
return MadeIRChange;
}
+namespace {
+class InstCombinerLibCallSimplifier : public LibCallSimplifier {
+ InstCombiner *IC;
+public:
+ InstCombinerLibCallSimplifier(const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ InstCombiner *IC)
+ : LibCallSimplifier(TD, TLI) {
+ this->IC = IC;
+ }
+
+ /// replaceAllUsesWith - override so that instruction replacement
+ /// can be defined in terms of the instruction combiner framework.
+ virtual void replaceAllUsesWith(Instruction *I, Value *With) const {
+ IC->ReplaceInstUsesWith(*I, With);
+ }
+};
+}
bool InstCombiner::runOnFunction(Function &F) {
- TD = getAnalysisIfAvailable<TargetData>();
+ TD = getAnalysisIfAvailable<DataLayout>();
TLI = &getAnalysis<TargetLibraryInfo>();
/// Builder - This is an IRBuilder that automatically inserts new
@@ -2129,6 +2397,9 @@ bool InstCombiner::runOnFunction(Function &F) {
InstCombineIRInserter(Worklist));
Builder = &TheBuilder;
+ InstCombinerLibCallSimplifier TheSimplifier(TD, TLI, this);
+ Simplifier = &TheSimplifier;
+
bool EverMadeChange = false;
// Lower dbg.declare intrinsics otherwise their value may be clobbered
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 17b83ce..b7be462 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -15,7 +15,7 @@
#define DEBUG_TYPE "asan"
-#include "FunctionBlackList.h"
+#include "BlackList.h"
#include "llvm/Function.h"
#include "llvm/IRBuilder.h"
#include "llvm/InlineAsm.h"
@@ -35,7 +35,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/system_error.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -61,6 +61,8 @@ static const int kAsanCtorAndCtorPriority = 1;
static const char *kAsanReportErrorTemplate = "__asan_report_";
static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
+static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
+static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
static const char *kAsanInitName = "__asan_init";
static const char *kAsanHandleNoReturnName = "__asan_handle_no_return";
static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
@@ -106,6 +108,8 @@ static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
// This flag may need to be replaced with -f[no]asan-globals.
static cl::opt<bool> ClGlobals("asan-globals",
cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClInitializers("asan-initialization-order",
+ cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false));
static cl::opt<bool> ClMemIntrin("asan-memintrin",
cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true));
// This flag may need to be replaced with -fasan-blacklist.
@@ -144,41 +148,33 @@ static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
cl::Hidden, cl::init(-1));
namespace {
-
-/// An object of this type is created while instrumenting every function.
-struct AsanFunctionContext {
- AsanFunctionContext(Function &Function) : F(Function) { }
-
- Function &F;
-};
-
/// AddressSanitizer: instrument the code in module to find memory bugs.
-struct AddressSanitizer : public ModulePass {
+struct AddressSanitizer : public FunctionPass {
AddressSanitizer();
virtual const char *getPassName() const;
- void instrumentMop(AsanFunctionContext &AFC, Instruction *I);
- void instrumentAddress(AsanFunctionContext &AFC,
- Instruction *OrigIns, IRBuilder<> &IRB,
+ void instrumentMop(Instruction *I);
+ void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB,
Value *Addr, uint32_t TypeSize, bool IsWrite);
Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
Value *ShadowValue, uint32_t TypeSize);
Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr,
bool IsWrite, size_t AccessSizeIndex);
- bool instrumentMemIntrinsic(AsanFunctionContext &AFC, MemIntrinsic *MI);
- void instrumentMemIntrinsicParam(AsanFunctionContext &AFC,
- Instruction *OrigIns, Value *Addr,
+ bool instrumentMemIntrinsic(MemIntrinsic *MI);
+ void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr,
Value *Size,
Instruction *InsertBefore, bool IsWrite);
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
- bool handleFunction(Module &M, Function &F);
+ bool runOnFunction(Function &F);
+ void createInitializerPoisonCalls(Module &M,
+ Value *FirstAddr, Value *LastAddr);
bool maybeInsertAsanInitAtFunctionEntry(Function &F);
- bool poisonStackInFunction(Module &M, Function &F);
- virtual bool runOnModule(Module &M);
+ bool poisonStackInFunction(Function &F);
+ virtual bool doInitialization(Module &M);
+ virtual bool doFinalization(Module &M);
bool insertGlobalRedzones(Module &M);
static char ID; // Pass identification, replacement for typeid
private:
-
uint64_t getAllocaSizeInBytes(AllocaInst *AI) {
Type *Ty = AI->getAllocatedType();
uint64_t SizeInBytes = TD->getTypeAllocSize(Ty);
@@ -194,12 +190,15 @@ struct AddressSanitizer : public ModulePass {
}
Function *checkInterfaceFunction(Constant *FuncOrBitcast);
+ bool ShouldInstrumentGlobal(GlobalVariable *G);
void PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB,
Value *ShadowBase, bool DoPoison);
bool LooksLikeCodeInBug11395(Instruction *I);
+ void FindDynamicInitializers(Module &M);
+ bool HasDynamicInitializer(GlobalVariable *G);
LLVMContext *C;
- TargetData *TD;
+ DataLayout *TD;
uint64_t MappingOffset;
int MappingScale;
size_t RedzoneSize;
@@ -208,11 +207,15 @@ struct AddressSanitizer : public ModulePass {
Type *IntptrPtrTy;
Function *AsanCtorFunction;
Function *AsanInitFunction;
+ Function *AsanStackMallocFunc, *AsanStackFreeFunc;
+ Function *AsanHandleNoReturnFunc;
Instruction *CtorInsertBefore;
- OwningPtr<FunctionBlackList> BL;
+ OwningPtr<BlackList> BL;
// This array is indexed by AccessIsWrite and log2(AccessSize).
Function *AsanErrorCallback[2][kNumberOfAccessSizes];
InlineAsm *EmptyAsm;
+ SmallSet<GlobalValue*, 32> DynamicallyInitializedGlobals;
+ SmallSet<GlobalValue*, 32> GlobalsCreatedByAsan;
};
} // namespace
@@ -221,8 +224,8 @@ char AddressSanitizer::ID = 0;
INITIALIZE_PASS(AddressSanitizer, "asan",
"AddressSanitizer: detects use-after-free and out-of-bounds bugs.",
false, false)
-AddressSanitizer::AddressSanitizer() : ModulePass(ID) { }
-ModulePass *llvm::createAddressSanitizerPass() {
+AddressSanitizer::AddressSanitizer() : FunctionPass(ID) { }
+FunctionPass *llvm::createAddressSanitizerPass() {
return new AddressSanitizer();
}
@@ -243,38 +246,6 @@ static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
GlobalValue::PrivateLinkage, StrConst, "");
}
-// Split the basic block and insert an if-then code.
-// Before:
-// Head
-// Cmp
-// Tail
-// After:
-// Head
-// if (Cmp)
-// ThenBlock
-// Tail
-//
-// ThenBlock block is created and its terminator is returned.
-// If Unreachable, ThenBlock is terminated with UnreachableInst, otherwise
-// it is terminated with BranchInst to Tail.
-static TerminatorInst *splitBlockAndInsertIfThen(Value *Cmp, bool Unreachable) {
- Instruction *SplitBefore = cast<Instruction>(Cmp)->getNextNode();
- BasicBlock *Head = SplitBefore->getParent();
- BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
- TerminatorInst *HeadOldTerm = Head->getTerminator();
- LLVMContext &C = Head->getParent()->getParent()->getContext();
- BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
- TerminatorInst *CheckTerm;
- if (Unreachable)
- CheckTerm = new UnreachableInst(C, ThenBlock);
- else
- CheckTerm = BranchInst::Create(Tail, ThenBlock);
- BranchInst *HeadNewTerm =
- BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp);
- ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
- return CheckTerm;
-}
-
Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
// Shadow >> scale
Shadow = IRB.CreateLShr(Shadow, MappingScale);
@@ -286,12 +257,12 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
}
void AddressSanitizer::instrumentMemIntrinsicParam(
- AsanFunctionContext &AFC, Instruction *OrigIns,
+ Instruction *OrigIns,
Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) {
// Check the first byte.
{
IRBuilder<> IRB(InsertBefore);
- instrumentAddress(AFC, OrigIns, IRB, Addr, 8, IsWrite);
+ instrumentAddress(OrigIns, IRB, Addr, 8, IsWrite);
}
// Check the last byte.
{
@@ -301,13 +272,12 @@ void AddressSanitizer::instrumentMemIntrinsicParam(
SizeMinusOne = IRB.CreateIntCast(SizeMinusOne, IntptrTy, false);
Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
Value *AddrPlusSizeMinisOne = IRB.CreateAdd(AddrLong, SizeMinusOne);
- instrumentAddress(AFC, OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite);
+ instrumentAddress(OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite);
}
}
// Instrument memset/memmove/memcpy
-bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC,
- MemIntrinsic *MI) {
+bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
Value *Dst = MI->getDest();
MemTransferInst *MemTran = dyn_cast<MemTransferInst>(MI);
Value *Src = MemTran ? MemTran->getSource() : 0;
@@ -323,12 +293,12 @@ bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC,
Value *Cmp = IRB.CreateICmpNE(Length,
Constant::getNullValue(Length->getType()));
- InsertBefore = splitBlockAndInsertIfThen(Cmp, false);
+ InsertBefore = SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
}
- instrumentMemIntrinsicParam(AFC, MI, Dst, Length, InsertBefore, true);
+ instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true);
if (Src)
- instrumentMemIntrinsicParam(AFC, MI, Src, Length, InsertBefore, false);
+ instrumentMemIntrinsicParam(MI, Src, Length, InsertBefore, false);
return true;
}
@@ -358,14 +328,50 @@ static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) {
return NULL;
}
-void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) {
- bool IsWrite;
+void AddressSanitizer::FindDynamicInitializers(Module& M) {
+ // Clang generates metadata identifying all dynamically initialized globals.
+ NamedMDNode *DynamicGlobals =
+ M.getNamedMetadata("llvm.asan.dynamically_initialized_globals");
+ if (!DynamicGlobals)
+ return;
+ for (int i = 0, n = DynamicGlobals->getNumOperands(); i < n; ++i) {
+ MDNode *MDN = DynamicGlobals->getOperand(i);
+ assert(MDN->getNumOperands() == 1);
+ Value *VG = MDN->getOperand(0);
+ // The optimizer may optimize away a global entirely, in which case we
+ // cannot instrument access to it.
+ if (!VG)
+ continue;
+
+ GlobalVariable *G = cast<GlobalVariable>(VG);
+ DynamicallyInitializedGlobals.insert(G);
+ }
+}
+// Returns true if a global variable is initialized dynamically in this TU.
+bool AddressSanitizer::HasDynamicInitializer(GlobalVariable *G) {
+ return DynamicallyInitializedGlobals.count(G);
+}
+
+void AddressSanitizer::instrumentMop(Instruction *I) {
+ bool IsWrite = false;
Value *Addr = isInterestingMemoryAccess(I, &IsWrite);
assert(Addr);
- if (ClOpt && ClOptGlobals && isa<GlobalVariable>(Addr)) {
- // We are accessing a global scalar variable. Nothing to catch here.
- return;
+ if (ClOpt && ClOptGlobals) {
+ if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) {
+ // If initialization order checking is disabled, a simple access to a
+ // dynamically initialized global is always valid.
+ if (!ClInitializers)
+ return;
+ // If a global variable does not have dynamic initialization we don't
+ // have to instrument it. However, if a global has external linkage, we
+ // assume it has dynamic initialization, as it may have an initializer
+ // in a different TU.
+ if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
+ !HasDynamicInitializer(G))
+ return;
+ }
}
+
Type *OrigPtrTy = Addr->getType();
Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
@@ -379,7 +385,7 @@ void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) {
}
IRBuilder<> IRB(I);
- instrumentAddress(AFC, I, IRB, Addr, TypeSize, IsWrite);
+ instrumentAddress(I, IRB, Addr, TypeSize, IsWrite);
}
// Validate the result of Module::getOrInsertFunction called for an interface
@@ -424,8 +430,7 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
}
-void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC,
- Instruction *OrigIns,
+void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
IRBuilder<> &IRB, Value *Addr,
uint32_t TypeSize, bool IsWrite) {
Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
@@ -444,17 +449,19 @@ void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC,
TerminatorInst *CrashTerm = 0;
if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
- TerminatorInst *CheckTerm = splitBlockAndInsertIfThen(Cmp, false);
+ TerminatorInst *CheckTerm =
+ SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional());
BasicBlock *NextBB = CheckTerm->getSuccessor(0);
IRB.SetInsertPoint(CheckTerm);
Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize);
- BasicBlock *CrashBlock = BasicBlock::Create(*C, "", &AFC.F, NextBB);
+ BasicBlock *CrashBlock =
+ BasicBlock::Create(*C, "", NextBB->getParent(), NextBB);
CrashTerm = new UnreachableInst(*C, CrashBlock);
BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2);
ReplaceInstWithInst(CheckTerm, NewTerm);
} else {
- CrashTerm = splitBlockAndInsertIfThen(Cmp, true);
+ CrashTerm = SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), true);
}
Instruction *Crash =
@@ -462,68 +469,108 @@ void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC,
Crash->setDebugLoc(OrigIns->getDebugLoc());
}
+void AddressSanitizer::createInitializerPoisonCalls(Module &M,
+ Value *FirstAddr,
+ Value *LastAddr) {
+ // We do all of our poisoning and unpoisoning within _GLOBAL__I_a.
+ Function *GlobalInit = M.getFunction("_GLOBAL__I_a");
+ // If that function is not present, this TU contains no globals, or they have
+ // all been optimized away
+ if (!GlobalInit)
+ return;
+
+ // Set up the arguments to our poison/unpoison functions.
+ IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt());
+
+ // Declare our poisoning and unpoisoning functions.
+ Function *AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ AsanPoisonGlobals->setLinkage(Function::ExternalLinkage);
+ Function *AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanUnpoisonGlobalsName, IRB.getVoidTy(), NULL));
+ AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage);
+
+ // Add a call to poison all external globals before the given function starts.
+ IRB.CreateCall2(AsanPoisonGlobals, FirstAddr, LastAddr);
+
+ // Add calls to unpoison all globals before each return instruction.
+ for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end();
+ I != E; ++I) {
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) {
+ CallInst::Create(AsanUnpoisonGlobals, "", RI);
+ }
+ }
+}
+
+bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) {
+ Type *Ty = cast<PointerType>(G->getType())->getElementType();
+ DEBUG(dbgs() << "GLOBAL: " << *G << "\n");
+
+ if (BL->isIn(*G)) return false;
+ if (!Ty->isSized()) return false;
+ if (!G->hasInitializer()) return false;
+ if (GlobalsCreatedByAsan.count(G)) return false; // Our own global.
+ // Touch only those globals that will not be defined in other modules.
+ // Don't handle ODR type linkages since other modules may be built w/o asan.
+ if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
+ G->getLinkage() != GlobalVariable::PrivateLinkage &&
+ G->getLinkage() != GlobalVariable::InternalLinkage)
+ return false;
+ // Two problems with thread-locals:
+ // - The address of the main thread's copy can't be computed at link-time.
+ // - Need to poison all copies, not just the main thread's one.
+ if (G->isThreadLocal())
+ return false;
+ // For now, just ignore this Alloca if the alignment is large.
+ if (G->getAlignment() > RedzoneSize) return false;
+
+ // Ignore all the globals with the names starting with "\01L_OBJC_".
+ // Many of those are put into the .cstring section. The linker compresses
+ // that section by removing the spare \0s after the string terminator, so
+ // our redzones get broken.
+ if ((G->getName().find("\01L_OBJC_") == 0) ||
+ (G->getName().find("\01l_OBJC_") == 0)) {
+ DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G);
+ return false;
+ }
+
+ if (G->hasSection()) {
+ StringRef Section(G->getSection());
+ // Ignore the globals from the __OBJC section. The ObjC runtime assumes
+ // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
+ // them.
+ if ((Section.find("__OBJC,") == 0) ||
+ (Section.find("__DATA, __objc_") == 0)) {
+ DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G);
+ return false;
+ }
+ // See http://code.google.com/p/address-sanitizer/issues/detail?id=32
+ // Constant CFString instances are compiled in the following way:
+ // -- the string buffer is emitted into
+ // __TEXT,__cstring,cstring_literals
+ // -- the constant NSConstantString structure referencing that buffer
+ // is placed into __DATA,__cfstring
+ // Therefore there's no point in placing redzones into __DATA,__cfstring.
+ // Moreover, it causes the linker to crash on OS X 10.7
+ if (Section.find("__DATA,__cfstring") == 0) {
+ DEBUG(dbgs() << "Ignoring CFString: " << *G);
+ return false;
+ }
+ }
+
+ return true;
+}
+
// This function replaces all global variables with new variables that have
// trailing redzones. It also creates a function that poisons
// redzones and inserts this function into llvm.global_ctors.
bool AddressSanitizer::insertGlobalRedzones(Module &M) {
SmallVector<GlobalVariable *, 16> GlobalsToChange;
- for (Module::GlobalListType::iterator G = M.getGlobalList().begin(),
- E = M.getGlobalList().end(); G != E; ++G) {
- Type *Ty = cast<PointerType>(G->getType())->getElementType();
- DEBUG(dbgs() << "GLOBAL: " << *G);
-
- if (!Ty->isSized()) continue;
- if (!G->hasInitializer()) continue;
- // Touch only those globals that will not be defined in other modules.
- // Don't handle ODR type linkages since other modules may be built w/o asan.
- if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
- G->getLinkage() != GlobalVariable::PrivateLinkage &&
- G->getLinkage() != GlobalVariable::InternalLinkage)
- continue;
- // Two problems with thread-locals:
- // - The address of the main thread's copy can't be computed at link-time.
- // - Need to poison all copies, not just the main thread's one.
- if (G->isThreadLocal())
- continue;
- // For now, just ignore this Alloca if the alignment is large.
- if (G->getAlignment() > RedzoneSize) continue;
-
- // Ignore all the globals with the names starting with "\01L_OBJC_".
- // Many of those are put into the .cstring section. The linker compresses
- // that section by removing the spare \0s after the string terminator, so
- // our redzones get broken.
- if ((G->getName().find("\01L_OBJC_") == 0) ||
- (G->getName().find("\01l_OBJC_") == 0)) {
- DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G);
- continue;
- }
-
- if (G->hasSection()) {
- StringRef Section(G->getSection());
- // Ignore the globals from the __OBJC section. The ObjC runtime assumes
- // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
- // them.
- if ((Section.find("__OBJC,") == 0) ||
- (Section.find("__DATA, __objc_") == 0)) {
- DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G);
- continue;
- }
- // See http://code.google.com/p/address-sanitizer/issues/detail?id=32
- // Constant CFString instances are compiled in the following way:
- // -- the string buffer is emitted into
- // __TEXT,__cstring,cstring_literals
- // -- the constant NSConstantString structure referencing that buffer
- // is placed into __DATA,__cfstring
- // Therefore there's no point in placing redzones into __DATA,__cfstring.
- // Moreover, it causes the linker to crash on OS X 10.7
- if (Section.find("__DATA,__cfstring") == 0) {
- DEBUG(dbgs() << "Ignoring CFString: " << *G);
- continue;
- }
- }
-
- GlobalsToChange.push_back(G);
+ for (Module::GlobalListType::iterator G = M.global_begin(),
+ E = M.global_end(); G != E; ++G) {
+ if (ShouldInstrumentGlobal(G))
+ GlobalsToChange.push_back(G);
}
size_t n = GlobalsToChange.size();
@@ -534,13 +581,22 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
// size_t size;
// size_t size_with_redzone;
// const char *name;
+ // size_t has_dynamic_init;
// We initialize an array of such structures and pass it to a run-time call.
StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
- IntptrTy, IntptrTy, NULL);
- SmallVector<Constant *, 16> Initializers(n);
+ IntptrTy, IntptrTy,
+ IntptrTy, NULL);
+ SmallVector<Constant *, 16> Initializers(n), DynamicInit;
IRBuilder<> IRB(CtorInsertBefore);
+ if (ClInitializers)
+ FindDynamicInitializers(M);
+
+ // The addresses of the first and last dynamically initialized globals in
+ // this TU. Used in initialization order checking.
+ Value *FirstDynamic = 0, *LastDynamic = 0;
+
for (size_t i = 0; i < n; i++) {
GlobalVariable *G = GlobalsToChange[i];
PointerType *PtrTy = cast<PointerType>(G->getType());
@@ -549,6 +605,10 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
uint64_t RightRedzoneSize = RedzoneSize +
(RedzoneSize - (SizeInBytes % RedzoneSize));
Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
+ // Determine whether this global should be poisoned in initialization.
+ bool GlobalHasDynamicInitializer = HasDynamicInitializer(G);
+ // Don't check initialization order if this global is blacklisted.
+ GlobalHasDynamicInitializer &= !BL->isInInit(*G);
StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL);
Constant *NewInitializer = ConstantStruct::get(
@@ -583,8 +643,17 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
ConstantInt::get(IntptrTy, SizeInBytes),
ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
ConstantExpr::getPointerCast(Name, IntptrTy),
+ ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer),
NULL);
- DEBUG(dbgs() << "NEW GLOBAL:\n" << *NewGlobal);
+
+ // Populate the first and last globals declared in this TU.
+ if (ClInitializers && GlobalHasDynamicInitializer) {
+ LastDynamic = ConstantExpr::getPointerCast(NewGlobal, IntptrTy);
+ if (FirstDynamic == 0)
+ FirstDynamic = LastDynamic;
+ }
+
+ DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
}
ArrayType *ArrayOfGlobalStructTy = ArrayType::get(GlobalStructTy, n);
@@ -592,8 +661,13 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
M, ArrayOfGlobalStructTy, false, GlobalVariable::PrivateLinkage,
ConstantArray::get(ArrayOfGlobalStructTy, Initializers), "");
+ // Create calls for poisoning before initializers run and unpoisoning after.
+ if (ClInitializers && FirstDynamic && LastDynamic)
+ createInitializerPoisonCalls(M, FirstDynamic, LastDynamic);
+
Function *AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ kAsanRegisterGlobalsName, IRB.getVoidTy(),
+ IntptrTy, IntptrTy, NULL));
AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
IRB.CreateCall2(AsanRegisterGlobals,
@@ -623,12 +697,13 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
}
// virtual
-bool AddressSanitizer::runOnModule(Module &M) {
+bool AddressSanitizer::doInitialization(Module &M) {
// Initialize the private fields. No one has accessed them before.
- TD = getAnalysisIfAvailable<TargetData>();
+ TD = getAnalysisIfAvailable<DataLayout>();
+
if (!TD)
return false;
- BL.reset(new FunctionBlackList(ClBlackListFile));
+ BL.reset(new BlackList(ClBlackListFile));
C = &(M.getContext());
LongSize = TD->getPointerSizeInBits();
@@ -656,17 +731,27 @@ bool AddressSanitizer::runOnModule(Module &M) {
std::string FunctionName = std::string(kAsanReportErrorTemplate) +
(AccessIsWrite ? "store" : "load") + itostr(1 << AccessSizeIndex);
// If we are merging crash callbacks, they have two parameters.
- AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = cast<Function>(
- M.getOrInsertFunction(FunctionName, IRB.getVoidTy(), IntptrTy, NULL));
+ AsanErrorCallback[AccessIsWrite][AccessSizeIndex] =
+ checkInterfaceFunction(M.getOrInsertFunction(
+ FunctionName, IRB.getVoidTy(), IntptrTy, NULL));
}
}
+
+ AsanStackMallocFunc = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL));
+ AsanStackFreeFunc = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanStackFreeName, IRB.getVoidTy(),
+ IntptrTy, IntptrTy, IntptrTy, NULL));
+ AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanHandleNoReturnName, IRB.getVoidTy(), NULL));
+
// We insert an empty inline asm after __asan_report* to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
StringRef(""), StringRef(""),
/*hasSideEffects=*/true);
llvm::Triple targetTriple(M.getTargetTriple());
- bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::ANDROIDEABI;
+ bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::Android;
MappingOffset = isAndroid ? kDefaultShadowOffsetAndroid :
(LongSize == 32 ? kDefaultShadowOffset32 : kDefaultShadowOffset64);
@@ -686,10 +771,6 @@ bool AddressSanitizer::runOnModule(Module &M) {
// For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
RedzoneSize = std::max(32, (int)(1 << MappingScale));
- bool Res = false;
-
- if (ClGlobals)
- Res |= insertGlobalRedzones(M);
if (ClMappingOffsetLog >= 0) {
// Tell the run-time the current values of mapping offset and scale.
@@ -709,17 +790,20 @@ bool AddressSanitizer::runOnModule(Module &M) {
IRB.CreateLoad(asan_mapping_scale, true);
}
-
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- Res |= handleFunction(M, *F);
- }
-
appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority);
- return Res;
+ return true;
+}
+
+bool AddressSanitizer::doFinalization(Module &M) {
+ // We transform the globals at the very end so that the optimization analysis
+ // works on the original globals.
+ if (ClGlobals)
+ return insertGlobalRedzones(M);
+ return false;
}
+
bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
// For each NSObject descendant having a +load method, this method is invoked
// by the ObjC runtime before any of the static constructors is called.
@@ -736,19 +820,22 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
return false;
}
-bool AddressSanitizer::handleFunction(Module &M, Function &F) {
+bool AddressSanitizer::runOnFunction(Function &F) {
if (BL->isIn(F)) return false;
if (&F == AsanCtorFunction) return false;
+ DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n");
// If needed, insert __asan_init before checking for AddressSafety attr.
maybeInsertAsanInitAtFunctionEntry(F);
- if (!F.hasFnAttr(Attribute::AddressSafety)) return false;
+ if (!F.getFnAttributes().hasAttribute(Attributes::AddressSafety))
+ return false;
if (!ClDebugFunc.empty() && ClDebugFunc != F.getName())
return false;
- // We want to instrument every address only once per basic block
- // (unless there are calls between uses).
+
+ // We want to instrument every address only once per basic block (unless there
+ // are calls between uses).
SmallSet<Value*, 16> TempsToInstrument;
SmallVector<Instruction*, 16> ToInstrument;
SmallVector<Instruction*, 8> NoReturnCalls;
@@ -786,8 +873,6 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) {
}
}
- AsanFunctionContext AFC(F);
-
// Instrument.
int NumInstrumented = 0;
for (size_t i = 0, n = ToInstrument.size(); i != n; i++) {
@@ -795,25 +880,23 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) {
if (ClDebugMin < 0 || ClDebugMax < 0 ||
(NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {
if (isInterestingMemoryAccess(Inst, &IsWrite))
- instrumentMop(AFC, Inst);
+ instrumentMop(Inst);
else
- instrumentMemIntrinsic(AFC, cast<MemIntrinsic>(Inst));
+ instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
}
NumInstrumented++;
}
- DEBUG(dbgs() << F);
-
- bool ChangedStack = poisonStackInFunction(M, F);
+ bool ChangedStack = poisonStackInFunction(F);
// We must unpoison the stack before every NoReturn call (throw, _exit, etc).
// See e.g. http://code.google.com/p/address-sanitizer/issues/detail?id=37
for (size_t i = 0, n = NoReturnCalls.size(); i != n; i++) {
Instruction *CI = NoReturnCalls[i];
IRBuilder<> IRB(CI);
- IRB.CreateCall(M.getOrInsertFunction(kAsanHandleNoReturnName,
- IRB.getVoidTy(), NULL));
+ IRB.CreateCall(AsanHandleNoReturnFunc);
}
+ DEBUG(dbgs() << "ASAN done instrumenting:\n" << F << "\n");
return NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty();
}
@@ -926,7 +1009,7 @@ bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) {
// compiler hoists the load of the shadow value somewhere too high.
// This causes asan to report a non-existing bug on 453.povray.
// It sounds like an LLVM bug.
-bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) {
+bool AddressSanitizer::poisonStackInFunction(Function &F) {
if (!ClStack) return false;
SmallVector<AllocaInst*, 16> AllocaVec;
SmallVector<Instruction*, 8> RetVec;
@@ -976,8 +1059,6 @@ bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) {
Value *LocalStackBase = OrigStackBase;
if (DoStackMalloc) {
- Value *AsanStackMallocFunc = M.getOrInsertFunction(
- kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL);
LocalStackBase = IRB.CreateCall2(AsanStackMallocFunc,
ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase);
}
@@ -1012,22 +1093,16 @@ bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) {
Value *BasePlus1 = IRB.CreateAdd(LocalStackBase,
ConstantInt::get(IntptrTy, LongSize/8));
BasePlus1 = IRB.CreateIntToPtr(BasePlus1, IntptrPtrTy);
- Value *Description = IRB.CreatePointerCast(
- createPrivateGlobalForString(M, StackDescription.str()),
- IntptrTy);
+ GlobalVariable *StackDescriptionGlobal =
+ createPrivateGlobalForString(*F.getParent(), StackDescription.str());
+ GlobalsCreatedByAsan.insert(StackDescriptionGlobal);
+ Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy);
IRB.CreateStore(Description, BasePlus1);
// Poison the stack redzones at the entry.
Value *ShadowBase = memToShadow(LocalStackBase, IRB);
PoisonStack(ArrayRef<AllocaInst*>(AllocaVec), IRB, ShadowBase, true);
- Value *AsanStackFreeFunc = NULL;
- if (DoStackMalloc) {
- AsanStackFreeFunc = M.getOrInsertFunction(
- kAsanStackFreeName, IRB.getVoidTy(),
- IntptrTy, IntptrTy, IntptrTy, NULL);
- }
-
// Unpoison the stack before all ret instructions.
for (size_t i = 0, n = RetVec.size(); i < n; i++) {
Instruction *Ret = RetVec[i];
@@ -1046,6 +1121,10 @@ bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) {
}
}
+ // We are done. Remove the old unused alloca instructions.
+ for (size_t i = 0, n = AllocaVec.size(); i < n; i++)
+ AllocaVec[i]->eraseFromParent();
+
if (ClDebugStack) {
DEBUG(dbgs() << F);
}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp b/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp
new file mode 100644
index 0000000..ef34b8a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp
@@ -0,0 +1,105 @@
+//===-- BlackList.cpp - blacklist for sanitizers --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility class for instrumentation passes (like AddressSanitizer
+// or ThreadSanitizer) to avoid instrumenting some functions or global
+// variables based on a user-supplied blacklist.
+//
+//===----------------------------------------------------------------------===//
+
+#include <utility>
+#include <string>
+
+#include "BlackList.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Module.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+
+namespace llvm {
+
+BlackList::BlackList(const StringRef Path) {
+ // Validate and open blacklist file.
+ if (!Path.size()) return;
+ OwningPtr<MemoryBuffer> File;
+ if (error_code EC = MemoryBuffer::getFile(Path, File)) {
+ report_fatal_error("Can't open blacklist file: " + Path + ": " +
+ EC.message());
+ }
+
+ // Iterate through each line in the blacklist file.
+ SmallVector<StringRef, 16> Lines;
+ SplitString(File.take()->getBuffer(), Lines, "\n\r");
+ StringMap<std::string> Regexps;
+ for (SmallVector<StringRef, 16>::iterator I = Lines.begin(), E = Lines.end();
+ I != E; ++I) {
+ // Ignore empty lines and lines starting with "#"
+ if (I->empty() || I->startswith("#"))
+ continue;
+ // Get our prefix and unparsed regexp.
+ std::pair<StringRef, StringRef> SplitLine = I->split(":");
+ StringRef Prefix = SplitLine.first;
+ std::string Regexp = SplitLine.second;
+
+ // Replace * with .*
+ for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos;
+ pos += strlen(".*")) {
+ Regexp.replace(pos, strlen("*"), ".*");
+ }
+
+ // Check that the regexp is valid.
+ Regex CheckRE(Regexp);
+ std::string Error;
+ if (!CheckRE.isValid(Error)) {
+ report_fatal_error("malformed blacklist regex: " + SplitLine.second +
+ ": " + Error);
+ }
+
+ // Add this regexp into the proper group by its prefix.
+ if (Regexps[Prefix].size())
+ Regexps[Prefix] += "|";
+ Regexps[Prefix] += Regexp;
+ }
+
+ // Iterate through each of the prefixes, and create Regexs for them.
+ for (StringMap<std::string>::iterator I = Regexps.begin(), E = Regexps.end();
+ I != E; ++I) {
+ Entries[I->getKey()] = new Regex(I->getValue());
+ }
+}
+
+bool BlackList::isIn(const Function &F) {
+ return isIn(*F.getParent()) || inSection("fun", F.getName());
+}
+
+bool BlackList::isIn(const GlobalVariable &G) {
+ return isIn(*G.getParent()) || inSection("global", G.getName());
+}
+
+bool BlackList::isIn(const Module &M) {
+ return inSection("src", M.getModuleIdentifier());
+}
+
+bool BlackList::isInInit(const GlobalVariable &G) {
+ return isIn(*G.getParent()) || inSection("global-init", G.getName());
+}
+
+bool BlackList::inSection(const StringRef Section,
+ const StringRef Query) {
+ Regex *FunctionRegex = Entries[Section];
+ return FunctionRegex ? FunctionRegex->match(Query) : false;
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/BlackList.h b/contrib/llvm/lib/Transforms/Instrumentation/BlackList.h
new file mode 100644
index 0000000..f3c05a5
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/BlackList.h
@@ -0,0 +1,57 @@
+//===-- BlackList.h - blacklist for sanitizers ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+//
+// This is a utility class for instrumentation passes (like AddressSanitizer
+// or ThreadSanitizer) to avoid instrumenting some functions or global
+// variables based on a user-supplied blacklist.
+//
+// The blacklist disables instrumentation of various functions and global
+// variables. Each line contains a prefix, followed by a wild card expression.
+// Empty lines and lines starting with "#" are ignored.
+// ---
+// # Blacklisted items:
+// fun:*_ZN4base6subtle*
+// global:*global_with_bad_access_or_initialization*
+// global-init:*global_with_initialization_issues*
+// src:file_with_tricky_code.cc
+// ---
+// Note that the wild card is in fact an llvm::Regex, but * is automatically
+// replaced with .*
+// This is similar to the "ignore" feature of ThreadSanitizer.
+// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "llvm/ADT/StringMap.h"
+
+namespace llvm {
+class Function;
+class GlobalVariable;
+class Module;
+class Regex;
+class StringRef;
+
+class BlackList {
+ public:
+ BlackList(const StringRef Path);
+ // Returns whether either this function or it's source file are blacklisted.
+ bool isIn(const Function &F);
+ // Returns whether either this global or it's source file are blacklisted.
+ bool isIn(const GlobalVariable &G);
+ // Returns whether this module is blacklisted by filename.
+ bool isIn(const Module &M);
+ // Returns whether a global should be excluded from initialization checking.
+ bool isInInit(const GlobalVariable &G);
+ private:
+ StringMap<Regex*> Entries;
+
+ bool inSection(const StringRef Section, const StringRef Query);
+};
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 09e0f14..7810b1b 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -23,7 +23,8 @@
#include "llvm/Support/InstIterator.h"
#include "llvm/Support/TargetFolder.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Instrumentation.h"
using namespace llvm;
@@ -47,11 +48,13 @@ namespace {
virtual bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<TargetData>();
+ AU.addRequired<DataLayout>();
+ AU.addRequired<TargetLibraryInfo>();
}
private:
- const TargetData *TD;
+ const DataLayout *TD;
+ const TargetLibraryInfo *TLI;
ObjectSizeOffsetEvaluator *ObjSizeEval;
BuilderTy *Builder;
Instruction *Inst;
@@ -140,7 +143,7 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
Value *Offset = SizeOffset.second;
ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size);
- IntegerType *IntTy = TD->getIntPtrType(Inst->getContext());
+ Type *IntTy = TD->getIntPtrType(Ptr->getType());
Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize);
// three checks are required to ensure safety:
@@ -165,12 +168,13 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
}
bool BoundsChecking::runOnFunction(Function &F) {
- TD = &getAnalysis<TargetData>();
+ TD = &getAnalysis<DataLayout>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
TrapBB = 0;
BuilderTy TheBuilder(F.getContext(), TargetFolder(TD));
Builder = &TheBuilder;
- ObjectSizeOffsetEvaluator TheObjSizeEval(TD, F.getContext());
+ ObjectSizeOffsetEvaluator TheObjSizeEval(TD, TLI, F.getContext());
ObjSizeEval = &TheObjSizeEval;
// check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/FunctionBlackList.cpp b/contrib/llvm/lib/Transforms/Instrumentation/FunctionBlackList.cpp
deleted file mode 100644
index 188ea4d..0000000
--- a/contrib/llvm/lib/Transforms/Instrumentation/FunctionBlackList.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-//===-- FunctionBlackList.cpp - blacklist of functions --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This is a utility class for instrumentation passes (like AddressSanitizer
-// or ThreadSanitizer) to avoid instrumenting some functions based on
-// user-supplied blacklist.
-//
-//===----------------------------------------------------------------------===//
-
-#include "FunctionBlackList.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Function.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Regex.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
-
-namespace llvm {
-
-FunctionBlackList::FunctionBlackList(const std::string &Path) {
- Functions = NULL;
- const char *kFunPrefix = "fun:";
- if (!Path.size()) return;
- std::string Fun;
-
- OwningPtr<MemoryBuffer> File;
- if (error_code EC = MemoryBuffer::getFile(Path.c_str(), File)) {
- report_fatal_error("Can't open blacklist file " + Path + ": " +
- EC.message());
- }
- MemoryBuffer *Buff = File.take();
- const char *Data = Buff->getBufferStart();
- size_t DataLen = Buff->getBufferSize();
- SmallVector<StringRef, 16> Lines;
- SplitString(StringRef(Data, DataLen), Lines, "\n\r");
- for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) {
- if (Lines[i].startswith(kFunPrefix)) {
- std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix));
- std::string ThisFuncRE;
- // add ThisFunc replacing * with .*
- for (size_t j = 0, n = ThisFunc.size(); j < n; j++) {
- if (ThisFunc[j] == '*')
- ThisFuncRE += '.';
- ThisFuncRE += ThisFunc[j];
- }
- // Check that the regexp is valid.
- Regex CheckRE(ThisFuncRE);
- std::string Error;
- if (!CheckRE.isValid(Error))
- report_fatal_error("malformed blacklist regex: " + ThisFunc +
- ": " + Error);
- // Append to the final regexp.
- if (Fun.size())
- Fun += "|";
- Fun += ThisFuncRE;
- }
- }
- if (Fun.size()) {
- Functions = new Regex(Fun);
- }
-}
-
-bool FunctionBlackList::isIn(const Function &F) {
- if (Functions) {
- bool Res = Functions->match(F.getName());
- return Res;
- }
- return false;
-}
-
-} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/FunctionBlackList.h b/contrib/llvm/lib/Transforms/Instrumentation/FunctionBlackList.h
deleted file mode 100644
index c1239b9..0000000
--- a/contrib/llvm/lib/Transforms/Instrumentation/FunctionBlackList.h
+++ /dev/null
@@ -1,37 +0,0 @@
-//===-- FunctionBlackList.cpp - blacklist of functions ----------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//===----------------------------------------------------------------------===//
-//
-// This is a utility class for instrumentation passes (like AddressSanitizer
-// or ThreadSanitizer) to avoid instrumenting some functions based on
-// user-supplied blacklist.
-//
-//===----------------------------------------------------------------------===//
-//
-
-#include <string>
-
-namespace llvm {
-class Function;
-class Regex;
-
-// Blacklisted functions are not instrumented.
-// The blacklist file contains one or more lines like this:
-// ---
-// fun:FunctionWildCard
-// ---
-// This is similar to the "ignore" feature of ThreadSanitizer.
-// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores
-class FunctionBlackList {
- public:
- FunctionBlackList(const std::string &Path);
- bool isIn(const Function &F);
- private:
- Regex *Functions;
-};
-
-} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 264a6a6..e9192e5 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -88,11 +88,11 @@ namespace {
// Add the function to write out all our counters to the global destructor
// list.
- void insertCounterWriteout(SmallVector<std::pair<GlobalVariable *,
- MDNode *>, 8> &);
+ void insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
void insertIndirectCounterIncrement();
+ void insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
- std::string mangleName(DICompileUnit CU, std::string NewStem);
+ std::string mangleName(DICompileUnit CU, const char *NewStem);
bool EmitNotes;
bool EmitData;
@@ -329,7 +329,7 @@ namespace {
};
}
-std::string GCOVProfiler::mangleName(DICompileUnit CU, std::string NewStem) {
+std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) {
if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) {
for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) {
MDNode *N = GCov->getOperand(i);
@@ -519,6 +519,7 @@ bool GCOVProfiler::emitProfileArcs() {
}
insertCounterWriteout(CountersBySP);
+ insertFlush(CountersBySP);
}
if (InsertIndCounterIncrCode)
@@ -630,14 +631,15 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() {
}
void GCOVProfiler::insertCounterWriteout(
- SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> &CountersBySP) {
- FunctionType *WriteoutFTy =
- FunctionType::get(Type::getVoidTy(*Ctx), false);
- Function *WriteoutF = Function::Create(WriteoutFTy,
- GlobalValue::InternalLinkage,
- "__llvm_gcov_writeout", M);
+ ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
+ FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
+ if (!WriteoutF)
+ WriteoutF = Function::Create(WriteoutFTy, GlobalValue::InternalLinkage,
+ "__llvm_gcov_writeout", M);
WriteoutF->setUnnamedAddr(true);
- BasicBlock *BB = BasicBlock::Create(*Ctx, "", WriteoutF);
+
+ BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
IRBuilder<> Builder(BB);
Constant *StartFile = getStartFileFunc();
@@ -648,11 +650,11 @@ void GCOVProfiler::insertCounterWriteout(
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
if (CU_Nodes) {
for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
- DICompileUnit compile_unit(CU_Nodes->getOperand(i));
- std::string FilenameGcda = mangleName(compile_unit, "gcda");
+ DICompileUnit CU(CU_Nodes->getOperand(i));
+ std::string FilenameGcda = mangleName(CU, "gcda");
Builder.CreateCall(StartFile,
Builder.CreateGlobalStringPtr(FilenameGcda));
- for (SmallVector<std::pair<GlobalVariable *, MDNode *>, 8>::iterator
+ for (ArrayRef<std::pair<GlobalVariable *, MDNode *> >::iterator
I = CountersBySP.begin(), E = CountersBySP.end();
I != E; ++I) {
DISubprogram SP(I->second);
@@ -680,7 +682,7 @@ void GCOVProfiler::insertCounterWriteout(
"__llvm_gcov_init", M);
F->setUnnamedAddr(true);
F->setLinkage(GlobalValue::InternalLinkage);
- F->addFnAttr(Attribute::NoInline);
+ F->addFnAttr(Attributes::NoInline);
BB = BasicBlock::Create(*Ctx, "entry", F);
Builder.SetInsertPoint(BB);
@@ -699,7 +701,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
cast<Function>(GCOVProfiler::getIncrementIndirectCounterFunc());
Fn->setUnnamedAddr(true);
Fn->setLinkage(GlobalValue::InternalLinkage);
- Fn->addFnAttr(Attribute::NoInline);
+ Fn->addFnAttr(Attributes::NoInline);
Type *Int32Ty = Type::getInt32Ty(*Ctx);
Type *Int64Ty = Type::getInt64Ty(*Ctx);
@@ -745,3 +747,42 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
Builder.SetInsertPoint(Exit);
Builder.CreateRetVoid();
}
+
+void GCOVProfiler::
+insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Function *FlushF = M->getFunction("__gcov_flush");
+ if (!FlushF)
+ FlushF = Function::Create(FTy, GlobalValue::InternalLinkage,
+ "__gcov_flush", M);
+ else
+ FlushF->setLinkage(GlobalValue::InternalLinkage);
+ FlushF->setUnnamedAddr(true);
+
+ BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF);
+
+ // Write out the current counters.
+ Constant *WriteoutF = M->getFunction("__llvm_gcov_writeout");
+ assert(WriteoutF && "Need to create the writeout function first!");
+
+ IRBuilder<> Builder(Entry);
+ Builder.CreateCall(WriteoutF);
+
+ // Zero out the counters.
+ for (ArrayRef<std::pair<GlobalVariable *, MDNode *> >::iterator
+ I = CountersBySP.begin(), E = CountersBySP.end();
+ I != E; ++I) {
+ GlobalVariable *GV = I->first;
+ Constant *Null = Constant::getNullValue(GV->getType()->getElementType());
+ Builder.CreateStore(Null, GV);
+ }
+
+ Type *RetTy = FlushF->getReturnType();
+ if (RetTy == Type::getVoidTy(*Ctx))
+ Builder.CreateRetVoid();
+ else if (RetTy->isIntegerTy())
+ // Used if __gcov_flush was implicitly declared.
+ Builder.CreateRet(ConstantInt::get(RetTy, 0));
+ else
+ report_fatal_error("invalid return type for __gcov_flush");
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h
index f76c77e..a4bb5a6 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h
+++ b/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -26,30 +26,6 @@ namespace llvm {
/// The type parameter T determines the type of the nodes of the graph.
template <typename T>
class MaximumSpanningTree {
-
- // A comparing class for comparing weighted edges.
- template <typename CT>
- struct EdgeWeightCompare {
- bool operator()(typename MaximumSpanningTree<CT>::EdgeWeight X,
- typename MaximumSpanningTree<CT>::EdgeWeight Y) const {
- if (X.second > Y.second) return true;
- if (X.second < Y.second) return false;
- if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.first)) {
- if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.first)) {
- if (BBX->size() > BBY->size()) return true;
- if (BBX->size() < BBY->size()) return false;
- }
- }
- if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.second)) {
- if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.second)) {
- if (BBX->size() > BBY->size()) return true;
- if (BBX->size() < BBY->size()) return false;
- }
- }
- return false;
- }
- };
-
public:
typedef std::pair<const T*, const T*> Edge;
typedef std::pair<Edge, double> EdgeWeight;
@@ -59,6 +35,33 @@ namespace llvm {
MaxSpanTree MST;
+ private:
+ // A comparing class for comparing weighted edges.
+ struct EdgeWeightCompare {
+ static bool getBlockSize(const T *X) {
+ const BasicBlock *BB = dyn_cast_or_null<BasicBlock>(X);
+ return BB ? BB->size() : 0;
+ }
+
+ bool operator()(EdgeWeight X, EdgeWeight Y) const {
+ if (X.second > Y.second) return true;
+ if (X.second < Y.second) return false;
+
+ // Equal edge weights: break ties by comparing block sizes.
+ size_t XSizeA = getBlockSize(X.first.first);
+ size_t YSizeA = getBlockSize(Y.first.first);
+ if (XSizeA > YSizeA) return true;
+ if (XSizeA < YSizeA) return false;
+
+ size_t XSizeB = getBlockSize(X.first.second);
+ size_t YSizeB = getBlockSize(Y.first.second);
+ if (XSizeB > YSizeB) return true;
+ if (XSizeB < YSizeB) return false;
+
+ return false;
+ }
+ };
+
public:
static char ID; // Class identification, replacement for typeinfo
@@ -66,7 +69,7 @@ namespace llvm {
/// spanning tree.
MaximumSpanningTree(EdgeWeights &EdgeVector) {
- std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare<T>());
+ std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare());
// Create spanning tree, Forest contains a special data structure
// that makes checking if two nodes are already in a common (sub-)tree
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index dc0fa71..9e10fc4 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -21,7 +21,7 @@
#define DEBUG_TYPE "tsan"
-#include "FunctionBlackList.h"
+#include "BlackList.h"
#include "llvm/Function.h"
#include "llvm/IRBuilder.h"
#include "llvm/Intrinsics.h"
@@ -38,7 +38,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -47,10 +47,19 @@ using namespace llvm;
static cl::opt<std::string> ClBlackListFile("tsan-blacklist",
cl::desc("Blacklist file"), cl::Hidden);
+static cl::opt<bool> ClInstrumentMemoryAccesses(
+ "tsan-instrument-memory-accesses", cl::init(true),
+ cl::desc("Instrument memory accesses"), cl::Hidden);
+static cl::opt<bool> ClInstrumentFuncEntryExit(
+ "tsan-instrument-func-entry-exit", cl::init(true),
+ cl::desc("Instrument function entry and exit"), cl::Hidden);
+static cl::opt<bool> ClInstrumentAtomics(
+ "tsan-instrument-atomics", cl::init(true),
+ cl::desc("Instrument atomics"), cl::Hidden);
STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
-STATISTIC(NumOmittedReadsBeforeWrite,
+STATISTIC(NumOmittedReadsBeforeWrite,
"Number of reads ignored due to following writes");
STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size");
STATISTIC(NumInstrumentedVtableWrites, "Number of vtable ptr writes");
@@ -76,8 +85,8 @@ struct ThreadSanitizer : public FunctionPass {
bool addrPointsToConstantData(Value *Addr);
int getMemoryAccessFuncIndex(Value *Addr);
- TargetData *TD;
- OwningPtr<FunctionBlackList> BL;
+ DataLayout *TD;
+ OwningPtr<BlackList> BL;
IntegerType *OrdTy;
// Callbacks to run-time library are computed in doInitialization.
Function *TsanFuncEntry;
@@ -88,6 +97,10 @@ struct ThreadSanitizer : public FunctionPass {
Function *TsanWrite[kNumberOfAccessSizes];
Function *TsanAtomicLoad[kNumberOfAccessSizes];
Function *TsanAtomicStore[kNumberOfAccessSizes];
+ Function *TsanAtomicRMW[AtomicRMWInst::LAST_BINOP + 1][kNumberOfAccessSizes];
+ Function *TsanAtomicCAS[kNumberOfAccessSizes];
+ Function *TsanAtomicThreadFence;
+ Function *TsanAtomicSignalFence;
Function *TsanVptrUpdate;
};
} // namespace
@@ -118,10 +131,10 @@ static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
}
bool ThreadSanitizer::doInitialization(Module &M) {
- TD = getAnalysisIfAvailable<TargetData>();
+ TD = getAnalysisIfAvailable<DataLayout>();
if (!TD)
return false;
- BL.reset(new FunctionBlackList(ClBlackListFile));
+ BL.reset(new BlackList(ClBlackListFile));
// Always insert a call to __tsan_init into the module's CTORs.
IRBuilder<> IRB(M.getContext());
@@ -158,10 +171,42 @@ bool ThreadSanitizer::doInitialization(Module &M) {
TsanAtomicStore[i] = checkInterfaceFunction(M.getOrInsertFunction(
AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy,
NULL));
+
+ for (int op = AtomicRMWInst::FIRST_BINOP;
+ op <= AtomicRMWInst::LAST_BINOP; ++op) {
+ TsanAtomicRMW[op][i] = NULL;
+ const char *NamePart = NULL;
+ if (op == AtomicRMWInst::Xchg)
+ NamePart = "_exchange";
+ else if (op == AtomicRMWInst::Add)
+ NamePart = "_fetch_add";
+ else if (op == AtomicRMWInst::Sub)
+ NamePart = "_fetch_sub";
+ else if (op == AtomicRMWInst::And)
+ NamePart = "_fetch_and";
+ else if (op == AtomicRMWInst::Or)
+ NamePart = "_fetch_or";
+ else if (op == AtomicRMWInst::Xor)
+ NamePart = "_fetch_xor";
+ else
+ continue;
+ SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart);
+ TsanAtomicRMW[op][i] = checkInterfaceFunction(M.getOrInsertFunction(
+ RMWName, Ty, PtrTy, Ty, OrdTy, NULL));
+ }
+
+ SmallString<32> AtomicCASName("__tsan_atomic" + itostr(BitSize) +
+ "_compare_exchange_val");
+ TsanAtomicCAS[i] = checkInterfaceFunction(M.getOrInsertFunction(
+ AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, NULL));
}
TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction(
"__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(),
IRB.getInt8PtrTy(), NULL));
+ TsanAtomicThreadFence = checkInterfaceFunction(M.getOrInsertFunction(
+ "__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, NULL));
+ TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction(
+ "__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, NULL));
return true;
}
@@ -186,7 +231,7 @@ bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) {
NumOmittedReadsFromConstantGlobals++;
return true;
}
- } else if(LoadInst *L = dyn_cast<LoadInst>(Addr)) {
+ } else if (LoadInst *L = dyn_cast<LoadInst>(Addr)) {
if (isVtableAccess(L)) {
// Reads from a vtable pointer can not race with any writes.
NumOmittedReadsFromVtable++;
@@ -244,8 +289,8 @@ static bool isAtomic(Instruction *I) {
return true;
if (isa<AtomicCmpXchgInst>(I))
return true;
- if (FenceInst *FI = dyn_cast<FenceInst>(I))
- return FI->getSynchScope() == CrossThread;
+ if (isa<FenceInst>(I))
+ return true;
return false;
}
@@ -284,17 +329,19 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
// (e.g. variables that do not escape, etc).
// Instrument memory accesses.
- for (size_t i = 0, n = AllLoadsAndStores.size(); i < n; ++i) {
- Res |= instrumentLoadOrStore(AllLoadsAndStores[i]);
- }
+ if (ClInstrumentMemoryAccesses)
+ for (size_t i = 0, n = AllLoadsAndStores.size(); i < n; ++i) {
+ Res |= instrumentLoadOrStore(AllLoadsAndStores[i]);
+ }
// Instrument atomic memory accesses.
- for (size_t i = 0, n = AtomicAccesses.size(); i < n; ++i) {
- Res |= instrumentAtomic(AtomicAccesses[i]);
- }
+ if (ClInstrumentAtomics)
+ for (size_t i = 0, n = AtomicAccesses.size(); i < n; ++i) {
+ Res |= instrumentAtomic(AtomicAccesses[i]);
+ }
// Instrument function entry/exit points if there were instrumented accesses.
- if (Res || HasCalls) {
+ if ((Res || HasCalls) && ClInstrumentFuncEntryExit) {
IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
Value *ReturnAddress = IRB.CreateCall(
Intrinsic::getDeclaration(F.getParent(), Intrinsic::returnaddress),
@@ -343,12 +390,12 @@ static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
switch (ord) {
case NotAtomic: assert(false);
case Unordered: // Fall-through.
- case Monotonic: v = 1 << 0; break;
- // case Consume: v = 1 << 1; break; // Not specified yet.
- case Acquire: v = 1 << 2; break;
- case Release: v = 1 << 3; break;
- case AcquireRelease: v = 1 << 4; break;
- case SequentiallyConsistent: v = 1 << 5; break;
+ case Monotonic: v = 0; break;
+ // case Consume: v = 1; break; // Not specified yet.
+ case Acquire: v = 2; break;
+ case Release: v = 3; break;
+ case AcquireRelease: v = 4; break;
+ case SequentiallyConsistent: v = 5; break;
}
return IRB->getInt32(v);
}
@@ -385,12 +432,44 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
CallInst *C = CallInst::Create(TsanAtomicStore[Idx],
ArrayRef<Value*>(Args));
ReplaceInstWithInst(I, C);
- } else if (isa<AtomicRMWInst>(I)) {
- // FIXME: Not yet supported.
- } else if (isa<AtomicCmpXchgInst>(I)) {
- // FIXME: Not yet supported.
- } else if (isa<FenceInst>(I)) {
- // FIXME: Not yet supported.
+ } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) {
+ Value *Addr = RMWI->getPointerOperand();
+ int Idx = getMemoryAccessFuncIndex(Addr);
+ if (Idx < 0)
+ return false;
+ Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx];
+ if (F == NULL)
+ return false;
+ const size_t ByteSize = 1 << Idx;
+ const size_t BitSize = ByteSize * 8;
+ Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
+ Type *PtrTy = Ty->getPointerTo();
+ Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
+ IRB.CreateIntCast(RMWI->getValOperand(), Ty, false),
+ createOrdering(&IRB, RMWI->getOrdering())};
+ CallInst *C = CallInst::Create(F, ArrayRef<Value*>(Args));
+ ReplaceInstWithInst(I, C);
+ } else if (AtomicCmpXchgInst *CASI = dyn_cast<AtomicCmpXchgInst>(I)) {
+ Value *Addr = CASI->getPointerOperand();
+ int Idx = getMemoryAccessFuncIndex(Addr);
+ if (Idx < 0)
+ return false;
+ const size_t ByteSize = 1 << Idx;
+ const size_t BitSize = ByteSize * 8;
+ Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
+ Type *PtrTy = Ty->getPointerTo();
+ Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
+ IRB.CreateIntCast(CASI->getCompareOperand(), Ty, false),
+ IRB.CreateIntCast(CASI->getNewValOperand(), Ty, false),
+ createOrdering(&IRB, CASI->getOrdering())};
+ CallInst *C = CallInst::Create(TsanAtomicCAS[Idx], ArrayRef<Value*>(Args));
+ ReplaceInstWithInst(I, C);
+ } else if (FenceInst *FI = dyn_cast<FenceInst>(I)) {
+ Value *Args[] = {createOrdering(&IRB, FI->getOrdering())};
+ Function *F = FI->getSynchScope() == SingleThread ?
+ TsanAtomicSignalFence : TsanAtomicThreadFence;
+ CallInst *C = CallInst::Create(F, ArrayRef<Value*>(Args));
+ ReplaceInstWithInst(I, C);
}
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
index a3c426a..123ed0f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -27,6 +27,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/DominatorInternals.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Assembly/Writer.h"
@@ -37,12 +38,13 @@
#include "llvm/Support/PatternMatch.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Transforms/Utils/AddrModeMatcher.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/BypassSlowDivision.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -146,9 +148,18 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
TLInfo = &getAnalysis<TargetLibraryInfo>();
DT = getAnalysisIfAvailable<DominatorTree>();
PFI = getAnalysisIfAvailable<ProfileInfo>();
- OptSize = F.hasFnAttr(Attribute::OptimizeForSize);
+ OptSize = F.getFnAttributes().hasAttribute(Attributes::OptimizeForSize);
+
+ /// This optimization identifies DIV instructions that can be
+ /// profitably bypassed and carried out with a shorter, faster divide.
+ if (TLI && TLI->isSlowDivBypassed()) {
+ const DenseMap<unsigned int, unsigned int> &BypassWidths =
+ TLI->getBypassSlowDivWidths();
+ for (Function::iterator I = F.begin(); I != F.end(); I++)
+ EverMadeChange |= bypassSlowDivision(F, I, BypassWidths);
+ }
- // First pass, eliminate blocks that contain only PHI nodes and an
+ // Eliminate blocks that contain only PHI nodes and an
// unconditional branch.
EverMadeChange |= EliminateMostlyEmptyBlocks(F);
@@ -160,7 +171,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+ for (Function::iterator I = F.begin(); I != F.end(); ) {
BasicBlock *BB = I++;
MadeChange |= OptimizeBlock(*BB);
}
@@ -215,11 +226,13 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) {
// edge, just collapse it.
BasicBlock *SinglePred = BB->getSinglePredecessor();
- if (!SinglePred || SinglePred == BB) continue;
+ // Don't merge if BB's address is taken.
+ if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
if (Term && !Term->isConditional()) {
Changed = true;
+ DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n");
// Remember if SinglePred was the entry block of the function.
// If so, we will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
@@ -230,7 +243,6 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) {
// We have erased a block. Update the iterator.
I = BB;
- DEBUG(dbgs() << "Merged:\n"<< *SinglePred << "\n\n\n");
}
}
return Changed;
@@ -610,7 +622,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
// happens.
WeakVH IterHandle(CurInstIterator);
- replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getTargetData() : 0,
+ replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getDataLayout() : 0,
TLInfo, ModifiedDT ? 0 : DT);
// If the iterator instruction was recursively deleted, start over at the
@@ -634,8 +646,8 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
// From here on out we're working with named functions.
if (CI->getCalledFunction() == 0) return false;
- // We'll need TargetData from here on out.
- const TargetData *TD = TLI ? TLI->getTargetData() : 0;
+ // We'll need DataLayout from here on out.
+ const DataLayout *TD = TLI ? TLI->getDataLayout() : 0;
if (!TD) return false;
// Lower all default uses of _chk calls. This is very similar
@@ -649,6 +661,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
/// instructions to the predecessor to enable tail call optimizations. The
/// case it is currently looking for is:
+/// @code
/// bb0:
/// %tmp0 = tail call i32 @f0()
/// br label %return
@@ -661,9 +674,11 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
/// return:
/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
/// ret i32 %retval
+/// @endcode
///
/// =>
///
+/// @code
/// bb0:
/// %tmp0 = tail call i32 @f0()
/// ret i32 %tmp0
@@ -673,7 +688,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
/// bb2:
/// %tmp2 = tail call i32 @f2()
/// ret i32 %tmp2
-///
+/// @endcode
bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
if (!TLI)
return false;
@@ -699,7 +714,8 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
// See llvm::isInTailCallPosition().
const Function *F = BB->getParent();
Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
- if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+ if (CallerRetAttr.hasAttribute(Attributes::ZExt) ||
+ CallerRetAttr.hasAttribute(Attributes::SExt))
return false;
// Make sure there are no instructions between the PHI and return, or that the
@@ -757,7 +773,10 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
// Conservatively require the attributes of the call to match those of the
// return. Ignore noalias because it doesn't affect the call sequence.
Attributes CalleeRetAttr = CS.getAttributes().getRetAttributes();
- if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
+ if (AttrBuilder(CalleeRetAttr).
+ removeAttribute(Attributes::NoAlias) !=
+ AttrBuilder(CallerRetAttr).
+ removeAttribute(Attributes::NoAlias))
continue;
// Make sure the call instruction is followed by an unconditional branch to
@@ -774,7 +793,7 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
}
// If we eliminated all predecessors of the block, delete the block now.
- if (Changed && pred_begin(BB) == pred_end(BB))
+ if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
BB->eraseFromParent();
return Changed;
@@ -914,7 +933,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst);
Type *IntPtrTy =
- TLI->getTargetData()->getIntPtrType(AccessTy->getContext());
+ TLI->getDataLayout()->getIntPtrType(AccessTy->getContext());
Value *Result = 0;
@@ -988,7 +1007,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
WeakVH IterHandle(CurInstIterator);
BasicBlock *BB = CurInstIterator->getParent();
- RecursivelyDeleteTriviallyDeadInstructions(Repl);
+ RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
if (IterHandle != CurInstIterator) {
// If the iterator instruction was recursively deleted, start over at the
@@ -1174,17 +1193,32 @@ static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
}
+/// If we have a SelectInst that will likely profit from branch prediction,
+/// turn it into a branch.
bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
- // If we have a SelectInst that will likely profit from branch prediction,
- // turn it into a branch.
- if (DisableSelectToBranch || OptSize || !TLI ||
- !TLI->isPredictableSelectExpensive())
- return false;
+ bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
- if (!SI->getCondition()->getType()->isIntegerTy(1) ||
- !isFormingBranchFromSelectProfitable(SI))
+ // Can we convert the 'select' to CF ?
+ if (DisableSelectToBranch || OptSize || !TLI || VectorCond)
return false;
+ TargetLowering::SelectSupportKind SelectKind;
+ if (VectorCond)
+ SelectKind = TargetLowering::VectorMaskSelect;
+ else if (SI->getType()->isVectorTy())
+ SelectKind = TargetLowering::ScalarCondVectorVal;
+ else
+ SelectKind = TargetLowering::ScalarValSelect;
+
+ // Do we have efficient codegen support for this kind of 'selects' ?
+ if (TLI->isSelectSupported(SelectKind)) {
+ // We have efficient codegen support for the select instruction.
+ // Check if it is profitable to keep this 'select'.
+ if (!TLI->isPredictableSelectExpensive() ||
+ !isFormingBranchFromSelectProfitable(SI))
+ return false;
+ }
+
ModifiedDT = true;
// First, we split the block containing the select into 2 blocks.
@@ -1302,7 +1336,7 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
bool MadeChange = false;
CurInstIterator = BB.begin();
- for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; )
+ while (CurInstIterator != BB.end())
MadeChange |= OptimizeInst(CurInstIterator++);
return MadeChange;
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
index 5430f62..369720b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
@@ -24,7 +24,7 @@
#include "llvm/Constant.h"
#include "llvm/Instruction.h"
#include "llvm/Pass.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/InstIterator.h"
#include "llvm/ADT/Statistic.h"
@@ -67,7 +67,7 @@ bool ConstantPropagation::runOnFunction(Function &F) {
WorkList.insert(&*i);
}
bool Changed = false;
- TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
while (!WorkList.empty()) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 9b0aadb..3ec6f3d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -235,6 +235,11 @@ bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) {
// This case never fires - remove it.
CI.getCaseSuccessor()->removePredecessor(BB);
SI->removeCase(CI); // Does not invalidate the iterator.
+
+ // The condition can be modified by removePredecessor's PHI simplification
+ // logic.
+ Cond = SI->getCondition();
+
++NumDeadCases;
Changed = true;
} else if (State == LazyValueInfo::True) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
index 8dbcc23..a2e074f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -22,6 +22,7 @@
#include "llvm/Instruction.h"
#include "llvm/Pass.h"
#include "llvm/Support/InstIterator.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -38,10 +39,11 @@ namespace {
initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry());
}
virtual bool runOnBasicBlock(BasicBlock &BB) {
+ TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
bool Changed = false;
for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
Instruction *Inst = DI++;
- if (isInstructionTriviallyDead(Inst)) {
+ if (isInstructionTriviallyDead(Inst, TLI)) {
Inst->eraseFromParent();
Changed = true;
++DIEEliminated;
@@ -87,6 +89,8 @@ char DCE::ID = 0;
INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
bool DCE::runOnFunction(Function &F) {
+ TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+
// Start out with all of the instructions in the worklist...
std::vector<Instruction*> WorkList;
for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i)
@@ -101,7 +105,7 @@ bool DCE::runOnFunction(Function &F) {
Instruction *I = WorkList.back();
WorkList.pop_back();
- if (isInstructionTriviallyDead(I)) { // If the instruction is dead.
+ if (isInstructionTriviallyDead(I, TLI)) { // If the instruction is dead.
// Loop over all of the values that the instruction uses, if there are
// instructions being used, add them to the worklist, because they might
// go dead after this one is removed.
@@ -114,13 +118,8 @@ bool DCE::runOnFunction(Function &F) {
I->eraseFromParent();
// Remove the instruction from the worklist if it still exists in it.
- for (std::vector<Instruction*>::iterator WI = WorkList.begin();
- WI != WorkList.end(); ) {
- if (*WI == I)
- WI = WorkList.erase(WI);
- else
- ++WI;
- }
+ WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), I),
+ WorkList.end());
MadeChange = true;
++DCEEliminated;
diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 8b1283f..736cc05 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -29,7 +29,8 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/Debug.h"
#include "llvm/ADT/SetVector.h"
@@ -45,6 +46,7 @@ namespace {
AliasAnalysis *AA;
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
+ const TargetLibraryInfo *TLI;
static char ID; // Pass identification, replacement for typeid
DSE() : FunctionPass(ID), AA(0), MD(0), DT(0) {
@@ -55,6 +57,7 @@ namespace {
AA = &getAnalysis<AliasAnalysis>();
MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTree>();
+ TLI = AA->getTargetLibraryInfo();
bool Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
@@ -106,6 +109,7 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
///
static void DeleteDeadInstruction(Instruction *I,
MemoryDependenceAnalysis &MD,
+ const TargetLibraryInfo *TLI,
SmallSetVector<Value*, 16> *ValueSet = 0) {
SmallVector<Instruction*, 32> NowDeadInsts;
@@ -130,7 +134,7 @@ static void DeleteDeadInstruction(Instruction *I,
if (!Op->use_empty()) continue;
if (Instruction *OpI = dyn_cast<Instruction>(Op))
- if (isInstructionTriviallyDead(OpI))
+ if (isInstructionTriviallyDead(OpI, TLI))
NowDeadInsts.push_back(OpI);
}
@@ -143,7 +147,7 @@ static void DeleteDeadInstruction(Instruction *I,
/// hasMemoryWrite - Does this instruction write some memory? This only returns
/// true for things that we can analyze with other helpers below.
-static bool hasMemoryWrite(Instruction *I) {
+static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
if (isa<StoreInst>(I))
return true;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
@@ -158,6 +162,26 @@ static bool hasMemoryWrite(Instruction *I) {
return true;
}
}
+ if (CallSite CS = I) {
+ if (Function *F = CS.getCalledFunction()) {
+ if (TLI && TLI->has(LibFunc::strcpy) &&
+ F->getName() == TLI->getName(LibFunc::strcpy)) {
+ return true;
+ }
+ if (TLI && TLI->has(LibFunc::strncpy) &&
+ F->getName() == TLI->getName(LibFunc::strncpy)) {
+ return true;
+ }
+ if (TLI && TLI->has(LibFunc::strcat) &&
+ F->getName() == TLI->getName(LibFunc::strcat)) {
+ return true;
+ }
+ if (TLI && TLI->has(LibFunc::strncat) &&
+ F->getName() == TLI->getName(LibFunc::strncat)) {
+ return true;
+ }
+ }
+ }
return false;
}
@@ -175,7 +199,7 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
// If we don't have target data around, an unknown size in Location means
// that we should use the size of the pointee type. This isn't valid for
// memset/memcpy, which writes more than an i8.
- if (Loc.Size == AliasAnalysis::UnknownSize && AA.getTargetData() == 0)
+ if (Loc.Size == AliasAnalysis::UnknownSize && AA.getDataLayout() == 0)
return AliasAnalysis::Location();
return Loc;
}
@@ -189,7 +213,7 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
// If we don't have target data around, an unknown size in Location means
// that we should use the size of the pointee type. This isn't valid for
// init.trampoline, which writes more than an i8.
- if (AA.getTargetData() == 0) return AliasAnalysis::Location();
+ if (AA.getDataLayout() == 0) return AliasAnalysis::Location();
// FIXME: We don't know the size of the trampoline, so we can't really
// handle it here.
@@ -205,7 +229,8 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
/// instruction if any.
static AliasAnalysis::Location
getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
- assert(hasMemoryWrite(Inst) && "Unknown instruction case");
+ assert(hasMemoryWrite(Inst, AA.getTargetLibraryInfo()) &&
+ "Unknown instruction case");
// The only instructions that both read and write are the mem transfer
// instructions (memcpy/memmove).
@@ -222,23 +247,29 @@ static bool isRemovable(Instruction *I) {
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->isUnordered();
- IntrinsicInst *II = cast<IntrinsicInst>(I);
- switch (II->getIntrinsicID()) {
- default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate");
- case Intrinsic::lifetime_end:
- // Never remove dead lifetime_end's, e.g. because it is followed by a
- // free.
- return false;
- case Intrinsic::init_trampoline:
- // Always safe to remove init_trampoline.
- return true;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate");
+ case Intrinsic::lifetime_end:
+ // Never remove dead lifetime_end's, e.g. because it is followed by a
+ // free.
+ return false;
+ case Intrinsic::init_trampoline:
+ // Always safe to remove init_trampoline.
+ return true;
- case Intrinsic::memset:
- case Intrinsic::memmove:
- case Intrinsic::memcpy:
- // Don't remove volatile memory intrinsics.
- return !cast<MemIntrinsic>(II)->isVolatile();
+ case Intrinsic::memset:
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ // Don't remove volatile memory intrinsics.
+ return !cast<MemIntrinsic>(II)->isVolatile();
+ }
}
+
+ if (CallSite CS = I)
+ return CS.getInstruction()->use_empty();
+
+ return false;
}
@@ -249,14 +280,19 @@ static bool isShortenable(Instruction *I) {
if (isa<StoreInst>(I))
return false;
- IntrinsicInst *II = cast<IntrinsicInst>(I);
- switch (II->getIntrinsicID()) {
- default: return false;
- case Intrinsic::memset:
- case Intrinsic::memcpy:
- // Do shorten memory intrinsics.
- return true;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::memset:
+ case Intrinsic::memcpy:
+ // Do shorten memory intrinsics.
+ return true;
+ }
}
+
+ // Don't shorten libcalls calls for now.
+
+ return false;
}
/// getStoredPointerOperand - Return the pointer that is being written to.
@@ -266,17 +302,23 @@ static Value *getStoredPointerOperand(Instruction *I) {
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
return MI->getDest();
- IntrinsicInst *II = cast<IntrinsicInst>(I);
- switch (II->getIntrinsicID()) {
- default: llvm_unreachable("Unexpected intrinsic!");
- case Intrinsic::init_trampoline:
- return II->getArgOperand(0);
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::init_trampoline:
+ return II->getArgOperand(0);
+ }
}
+
+ CallSite CS = I;
+ // All the supported functions so far happen to have dest as their first
+ // argument.
+ return CS.getArgument(0);
}
static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) {
uint64_t Size;
- if (getObjectSize(V, Size, AA.getTargetData()))
+ if (getObjectSize(V, Size, AA.getDataLayout(), AA.getTargetLibraryInfo()))
return Size;
return AliasAnalysis::UnknownSize;
}
@@ -309,10 +351,10 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// comparison.
if (Later.Size == AliasAnalysis::UnknownSize ||
Earlier.Size == AliasAnalysis::UnknownSize) {
- // If we have no TargetData information around, then the size of the store
+ // If we have no DataLayout information around, then the size of the store
// is inferrable from the pointee type. If they are the same type, then
// we know that the store is safe.
- if (AA.getTargetData() == 0 &&
+ if (AA.getDataLayout() == 0 &&
Later.Ptr->getType() == Earlier.Ptr->getType())
return OverwriteComplete;
@@ -328,13 +370,13 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// larger than the earlier one.
if (Later.Size == AliasAnalysis::UnknownSize ||
Earlier.Size == AliasAnalysis::UnknownSize ||
- AA.getTargetData() == 0)
+ AA.getDataLayout() == 0)
return OverwriteUnknown;
// Check to see if the later store is to the entire object (either a global,
// an alloca, or a byval argument). If so, then it clearly overwrites any
// other store to the same object.
- const TargetData &TD = *AA.getTargetData();
+ const DataLayout &TD = *AA.getDataLayout();
const Value *UO1 = GetUnderlyingObject(P1, &TD),
*UO2 = GetUnderlyingObject(P2, &TD);
@@ -454,13 +496,13 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
Instruction *Inst = BBI++;
// Handle 'free' calls specially.
- if (CallInst *F = isFreeCall(Inst)) {
+ if (CallInst *F = isFreeCall(Inst, TLI)) {
MadeChange |= HandleFree(F);
continue;
}
// If we find something that writes memory, get its memory dependence.
- if (!hasMemoryWrite(Inst))
+ if (!hasMemoryWrite(Inst, TLI))
continue;
MemDepResult InstDep = MD->getDependency(Inst);
@@ -483,7 +525,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
// in case we need it.
WeakVH NextInst(BBI);
- DeleteDeadInstruction(SI, *MD);
+ DeleteDeadInstruction(SI, *MD, TLI);
if (NextInst == 0) // Next instruction deleted.
BBI = BB.begin();
@@ -530,7 +572,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
// Delete the store and now-dead instructions that feed it.
- DeleteDeadInstruction(DepWrite, *MD);
+ DeleteDeadInstruction(DepWrite, *MD, TLI);
++NumFastStores;
MadeChange = true;
@@ -627,7 +669,7 @@ bool DSE::HandleFree(CallInst *F) {
MemDepResult Dep = MD->getPointerDependencyFrom(Loc, false, InstPt, BB);
while (Dep.isDef() || Dep.isClobber()) {
Instruction *Dependency = Dep.getInst();
- if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
+ if (!hasMemoryWrite(Dependency, TLI) || !isRemovable(Dependency))
break;
Value *DepPointer =
@@ -640,7 +682,7 @@ bool DSE::HandleFree(CallInst *F) {
Instruction *Next = llvm::next(BasicBlock::iterator(Dependency));
// DCE instructions only used to calculate that store
- DeleteDeadInstruction(Dependency, *MD);
+ DeleteDeadInstruction(Dependency, *MD, TLI);
++NumFastStores;
MadeChange = true;
@@ -659,6 +701,22 @@ bool DSE::HandleFree(CallInst *F) {
return MadeChange;
}
+namespace {
+ struct CouldRef {
+ typedef Value *argument_type;
+ const CallSite CS;
+ AliasAnalysis *AA;
+
+ bool operator()(Value *I) {
+ // See if the call site touches the value.
+ AliasAnalysis::ModRefResult A =
+ AA->getModRefInfo(CS, I, getPointerSize(I, *AA));
+
+ return A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref;
+ }
+ };
+}
+
/// handleEndBlock - Remove dead stores to stack-allocated locations in the
/// function end block. Ex:
/// %A = alloca i32
@@ -680,7 +738,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// Okay, so these are dead heap objects, but if the pointer never escapes
// then it's leaked by this function anyways.
- else if (isAllocLikeFn(I) && !PointerMayBeCaptured(I, true, true))
+ else if (isAllocLikeFn(I, TLI) && !PointerMayBeCaptured(I, true, true))
DeadStackObjects.insert(I);
}
@@ -696,7 +754,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
--BBI;
// If we find a store, check to see if it points into a dead stack value.
- if (hasMemoryWrite(BBI) && isRemovable(BBI)) {
+ if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) {
// See through pointer-to-pointer bitcasts
SmallVector<Value *, 4> Pointers;
GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers);
@@ -724,7 +782,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
dbgs() << '\n');
// DCE instructions only used to calculate that store.
- DeleteDeadInstruction(Dead, *MD, &DeadStackObjects);
+ DeleteDeadInstruction(Dead, *MD, TLI, &DeadStackObjects);
++NumFastStores;
MadeChange = true;
continue;
@@ -732,9 +790,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
}
// Remove any dead non-memory-mutating instructions.
- if (isInstructionTriviallyDead(BBI)) {
+ if (isInstructionTriviallyDead(BBI, TLI)) {
Instruction *Inst = BBI++;
- DeleteDeadInstruction(Inst, *MD, &DeadStackObjects);
+ DeleteDeadInstruction(Inst, *MD, TLI, &DeadStackObjects);
++NumFastOther;
MadeChange = true;
continue;
@@ -750,7 +808,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
if (CallSite CS = cast<Value>(BBI)) {
// Remove allocation function calls from the list of dead stack objects;
// there can't be any references before the definition.
- if (isAllocLikeFn(BBI))
+ if (isAllocLikeFn(BBI, TLI))
DeadStackObjects.remove(BBI);
// If this call does not access memory, it can't be loading any of our
@@ -760,20 +818,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// If the call might load from any of our allocas, then any store above
// the call is live.
- SmallVector<Value*, 8> LiveAllocas;
- for (SmallSetVector<Value*, 16>::iterator I = DeadStackObjects.begin(),
- E = DeadStackObjects.end(); I != E; ++I) {
- // See if the call site touches it.
- AliasAnalysis::ModRefResult A =
- AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA));
-
- if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
- LiveAllocas.push_back(*I);
- }
-
- for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
- E = LiveAllocas.end(); I != E; ++I)
- DeadStackObjects.remove(*I);
+ CouldRef Pred = { CS, AA };
+ DeadStackObjects.remove_if(Pred);
// If all of the allocas were clobbered by the call then we're not going
// to find anything else to process.
@@ -816,6 +862,20 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
return MadeChange;
}
+namespace {
+ struct CouldAlias {
+ typedef Value *argument_type;
+ const AliasAnalysis::Location &LoadedLoc;
+ AliasAnalysis *AA;
+
+ bool operator()(Value *I) {
+ // See if the loaded location could alias the stack location.
+ AliasAnalysis::Location StackLoc(I, getPointerSize(I, *AA));
+ return !AA->isNoAlias(StackLoc, LoadedLoc);
+ }
+ };
+}
+
/// RemoveAccessedObjects - Check to see if the specified location may alias any
/// of the stack objects in the DeadStackObjects set. If so, they become live
/// because the location is being loaded.
@@ -834,16 +894,7 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
return;
}
- SmallVector<Value*, 16> NowLive;
- for (SmallSetVector<Value*, 16>::iterator I = DeadStackObjects.begin(),
- E = DeadStackObjects.end(); I != E; ++I) {
- // See if the loaded location could alias the stack location.
- AliasAnalysis::Location StackLoc(*I, getPointerSize(*I, *AA));
- if (!AA->isNoAlias(StackLoc, LoadedLoc))
- NowLive.push_back(*I);
- }
-
- for (SmallVector<Value*, 16>::iterator I = NowLive.begin(), E = NowLive.end();
- I != E; ++I)
- DeadStackObjects.remove(*I);
+ // Remove objects that could alias LoadedLoc.
+ CouldAlias Pred = { LoadedLoc, AA };
+ DeadStackObjects.remove_if(Pred);
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 9759549..101009d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -18,11 +18,12 @@
#include "llvm/Pass.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/ScopedHashTable.h"
#include "llvm/ADT/Statistic.h"
#include <deque>
@@ -90,35 +91,56 @@ template<> struct DenseMapInfo<SimpleValue> {
unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
Instruction *Inst = Val.Inst;
-
// Hash in all of the operands as pointers.
- unsigned Res = 0;
- for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
- Res ^= getHash(Inst->getOperand(i)) << (i & 0xF);
+ if (BinaryOperator* BinOp = dyn_cast<BinaryOperator>(Inst)) {
+ Value *LHS = BinOp->getOperand(0);
+ Value *RHS = BinOp->getOperand(1);
+ if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
+ std::swap(LHS, RHS);
+
+ if (isa<OverflowingBinaryOperator>(BinOp)) {
+ // Hash the overflow behavior
+ unsigned Overflow =
+ BinOp->hasNoSignedWrap() * OverflowingBinaryOperator::NoSignedWrap |
+ BinOp->hasNoUnsignedWrap() * OverflowingBinaryOperator::NoUnsignedWrap;
+ return hash_combine(BinOp->getOpcode(), Overflow, LHS, RHS);
+ }
- if (CastInst *CI = dyn_cast<CastInst>(Inst))
- Res ^= getHash(CI->getType());
- else if (CmpInst *CI = dyn_cast<CmpInst>(Inst))
- Res ^= CI->getPredicate();
- else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst)) {
- for (ExtractValueInst::idx_iterator I = EVI->idx_begin(),
- E = EVI->idx_end(); I != E; ++I)
- Res ^= *I;
- } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst)) {
- for (InsertValueInst::idx_iterator I = IVI->idx_begin(),
- E = IVI->idx_end(); I != E; ++I)
- Res ^= *I;
- } else {
- // nothing extra to hash in.
- assert((isa<CallInst>(Inst) ||
- isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
- isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
- isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst)) &&
- "Invalid/unknown instruction");
+ return hash_combine(BinOp->getOpcode(), LHS, RHS);
}
+ if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
+ Value *LHS = CI->getOperand(0);
+ Value *RHS = CI->getOperand(1);
+ CmpInst::Predicate Pred = CI->getPredicate();
+ if (Inst->getOperand(0) > Inst->getOperand(1)) {
+ std::swap(LHS, RHS);
+ Pred = CI->getSwappedPredicate();
+ }
+ return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
+ }
+
+ if (CastInst *CI = dyn_cast<CastInst>(Inst))
+ return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
+
+ if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
+ return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
+ hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
+
+ if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
+ return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
+ IVI->getOperand(1),
+ hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
+
+ assert((isa<CallInst>(Inst) || isa<BinaryOperator>(Inst) ||
+ isa<GetElementPtrInst>(Inst) || isa<SelectInst>(Inst) ||
+ isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
+ isa<ShuffleVectorInst>(Inst)) && "Invalid/unknown instruction");
+
// Mix in the opcode.
- return (Res << 1) ^ Inst->getOpcode();
+ return hash_combine(Inst->getOpcode(),
+ hash_combine_range(Inst->value_op_begin(),
+ Inst->value_op_end()));
}
bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
@@ -128,7 +150,41 @@ bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
return LHSI == RHSI;
if (LHSI->getOpcode() != RHSI->getOpcode()) return false;
- return LHSI->isIdenticalTo(RHSI);
+ if (LHSI->isIdenticalTo(RHSI)) return true;
+
+ // If we're not strictly identical, we still might be a commutable instruction
+ if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
+ if (!LHSBinOp->isCommutative())
+ return false;
+
+ assert(isa<BinaryOperator>(RHSI)
+ && "same opcode, but different instruction type?");
+ BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
+
+ // Check overflow attributes
+ if (isa<OverflowingBinaryOperator>(LHSBinOp)) {
+ assert(isa<OverflowingBinaryOperator>(RHSBinOp)
+ && "same opcode, but different operator type?");
+ if (LHSBinOp->hasNoUnsignedWrap() != RHSBinOp->hasNoUnsignedWrap() ||
+ LHSBinOp->hasNoSignedWrap() != RHSBinOp->hasNoSignedWrap())
+ return false;
+ }
+
+ // Commuted equality
+ return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
+ LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
+ }
+ if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
+ assert(isa<CmpInst>(RHSI)
+ && "same opcode, but different instruction type?");
+ CmpInst *RHSCmp = cast<CmpInst>(RHSI);
+ // Commuted equality
+ return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
+ LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
+ LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
+ }
+
+ return false;
}
//===----------------------------------------------------------------------===//
@@ -216,7 +272,7 @@ namespace {
/// cases.
class EarlyCSE : public FunctionPass {
public:
- const TargetData *TD;
+ const DataLayout *TD;
const TargetLibraryInfo *TLI;
DominatorTree *DT;
typedef RecyclingAllocator<BumpPtrAllocator,
@@ -274,7 +330,8 @@ private:
CallScope(*availableCalls) {}
private:
- NodeScope(const NodeScope&); // DO NOT IMPLEMENT
+ NodeScope(const NodeScope&) LLVM_DELETED_FUNCTION;
+ void operator=(const NodeScope&) LLVM_DELETED_FUNCTION;
ScopedHTType::ScopeTy Scope;
LoadHTType::ScopeTy LoadScope;
@@ -313,7 +370,8 @@ private:
void process() { Processed = true; }
private:
- StackNode(const StackNode&); // DO NOT IMPLEMENT
+ StackNode(const StackNode&) LLVM_DELETED_FUNCTION;
+ void operator=(const StackNode&) LLVM_DELETED_FUNCTION;
// Members.
unsigned CurrentGeneration;
@@ -374,7 +432,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
Instruction *Inst = I++;
// Dead instructions should just be removed.
- if (isInstructionTriviallyDead(Inst)) {
+ if (isInstructionTriviallyDead(Inst, TLI)) {
DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
Inst->eraseFromParent();
Changed = true;
@@ -506,7 +564,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
bool EarlyCSE::runOnFunction(Function &F) {
std::deque<StackNode *> nodesToProcess;
- TD = getAnalysisIfAvailable<TargetData>();
+ TD = getAnalysisIfAvailable<DataLayout>();
TLI = &getAnalysis<TargetLibraryInfo>();
DT = &getAnalysis<DominatorTree>();
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index 4822fd0..f003e06 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -41,7 +41,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/PatternMatch.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -271,16 +271,16 @@ void ValueTable::add(Value *V, uint32_t num) {
valueNumbering.insert(std::make_pair(V, num));
}
-uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
+uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
if (AA->doesNotAccessMemory(C)) {
Expression exp = create_expression(C);
- uint32_t& e = expressionNumbering[exp];
+ uint32_t &e = expressionNumbering[exp];
if (!e) e = nextValueNumber++;
valueNumbering[C] = e;
return e;
} else if (AA->onlyReadsMemory(C)) {
Expression exp = create_expression(C);
- uint32_t& e = expressionNumbering[exp];
+ uint32_t &e = expressionNumbering[exp];
if (!e) {
e = nextValueNumber++;
valueNumbering[C] = e;
@@ -413,7 +413,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
case Instruction::LShr:
case Instruction::AShr:
case Instruction::And:
- case Instruction::Or :
+ case Instruction::Or:
case Instruction::Xor:
case Instruction::ICmp:
case Instruction::FCmp:
@@ -503,7 +503,7 @@ namespace {
bool NoLoads;
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
- const TargetData *TD;
+ const DataLayout *TD;
const TargetLibraryInfo *TLI;
ValueTable VN;
@@ -535,7 +535,7 @@ namespace {
InstrsToErase.push_back(I);
}
- const TargetData *getTargetData() const { return TD; }
+ const DataLayout *getDataLayout() const { return TD; }
DominatorTree &getDominatorTree() const { return *DT; }
AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
MemoryDependenceAnalysis &getMemDep() const { return *MD; }
@@ -632,6 +632,7 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void GVN::dump(DenseMap<uint32_t, Value*>& d) {
errs() << "{\n";
for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
@@ -641,6 +642,7 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) {
}
errs() << "}\n";
}
+#endif
/// IsValueFullyAvailableInBlock - Return true if we can prove that the value
/// we're analyzing is fully available in the specified block. As we go, keep
@@ -728,7 +730,7 @@ SpeculationFailure:
/// CoerceAvailableValueToLoadType will succeed.
static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
Type *LoadTy,
- const TargetData &TD) {
+ const DataLayout &TD) {
// If the loaded or stored value is an first class array or struct, don't try
// to transform them. We need to be able to bitcast to integer.
if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
@@ -744,7 +746,6 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
return true;
}
-
/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
/// then a load from a must-aliased pointer of a different type, try to coerce
/// the stored value. LoadedTy is the type of the load we want to replace and
@@ -754,7 +755,7 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
Type *LoadedTy,
Instruction *InsertPt,
- const TargetData &TD) {
+ const DataLayout &TD) {
if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
return 0;
@@ -767,24 +768,25 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
// If the store and reload are the same size, we can always reuse it.
if (StoreSize == LoadSize) {
// Pointer to Pointer -> use bitcast.
- if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy())
+ if (StoredValTy->getScalarType()->isPointerTy() &&
+ LoadedTy->getScalarType()->isPointerTy())
return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
// Convert source pointers to integers, which can be bitcast.
- if (StoredValTy->isPointerTy()) {
- StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
+ if (StoredValTy->getScalarType()->isPointerTy()) {
+ StoredValTy = TD.getIntPtrType(StoredValTy);
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
Type *TypeToCastTo = LoadedTy;
- if (TypeToCastTo->isPointerTy())
- TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
+ if (TypeToCastTo->getScalarType()->isPointerTy())
+ TypeToCastTo = TD.getIntPtrType(TypeToCastTo);
if (StoredValTy != TypeToCastTo)
StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
// Cast to pointer if the load needs a pointer type.
- if (LoadedTy->isPointerTy())
+ if (LoadedTy->getScalarType()->isPointerTy())
StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
return StoredVal;
@@ -796,8 +798,8 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
// Convert source pointers to integers, which can be manipulated.
- if (StoredValTy->isPointerTy()) {
- StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
+ if (StoredValTy->getScalarType()->isPointerTy()) {
+ StoredValTy = TD.getIntPtrType(StoredValTy);
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
@@ -822,7 +824,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
return StoredVal;
// If the result is a pointer, inttoptr.
- if (LoadedTy->isPointerTy())
+ if (LoadedTy->getScalarType()->isPointerTy())
return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
// Otherwise, bitcast.
@@ -840,7 +842,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
Value *WritePtr,
uint64_t WriteSizeInBits,
- const TargetData &TD) {
+ const DataLayout &TD) {
// If the loaded or stored value is a first class array or struct, don't try
// to transform them. We need to be able to bitcast to integer.
if (LoadTy->isStructTy() || LoadTy->isArrayTy())
@@ -913,7 +915,7 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
/// memdep query of a load that ends up being a clobbering store.
static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
StoreInst *DepSI,
- const TargetData &TD) {
+ const DataLayout &TD) {
// Cannot handle reading from store of first-class aggregate yet.
if (DepSI->getValueOperand()->getType()->isStructTy() ||
DepSI->getValueOperand()->getType()->isArrayTy())
@@ -929,7 +931,7 @@ static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
/// memdep query of a load that ends up being clobbered by another load. See if
/// the other load can feed into the second load.
static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
- LoadInst *DepLI, const TargetData &TD){
+ LoadInst *DepLI, const DataLayout &TD){
// Cannot handle reading from store of first-class aggregate yet.
if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
return -1;
@@ -957,7 +959,7 @@ static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
MemIntrinsic *MI,
- const TargetData &TD) {
+ const DataLayout &TD) {
// If the mem operation is a non-constant size, we can't handle it.
ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
if (SizeCst == 0) return -1;
@@ -1007,7 +1009,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
/// before we give up.
static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
Type *LoadTy,
- Instruction *InsertPt, const TargetData &TD){
+ Instruction *InsertPt, const DataLayout &TD){
LLVMContext &Ctx = SrcVal->getType()->getContext();
uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
@@ -1017,8 +1019,9 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
// Compute which bits of the stored value are being used by the load. Convert
// to an integer type to start with.
- if (SrcVal->getType()->isPointerTy())
- SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx));
+ if (SrcVal->getType()->getScalarType()->isPointerTy())
+ SrcVal = Builder.CreatePtrToInt(SrcVal,
+ TD.getIntPtrType(SrcVal->getType()));
if (!SrcVal->getType()->isIntegerTy())
SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8));
@@ -1046,7 +1049,7 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
Type *LoadTy, Instruction *InsertPt,
GVN &gvn) {
- const TargetData &TD = *gvn.getTargetData();
+ const DataLayout &TD = *gvn.getDataLayout();
// If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
// widen SrcVal out to a larger load.
unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType());
@@ -1105,7 +1108,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
/// memdep query of a load that ends up being a clobbering mem intrinsic.
static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
Type *LoadTy, Instruction *InsertPt,
- const TargetData &TD){
+ const DataLayout &TD){
LLVMContext &Ctx = LoadTy->getContext();
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
@@ -1229,7 +1232,7 @@ struct AvailableValueInBlock {
if (isSimpleValue()) {
Res = getSimpleValue();
if (Res->getType() != LoadTy) {
- const TargetData *TD = gvn.getTargetData();
+ const DataLayout *TD = gvn.getDataLayout();
assert(TD && "Need target data to handle type mismatch case");
Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
*TD);
@@ -1251,7 +1254,7 @@ struct AvailableValueInBlock {
<< *Res << '\n' << "\n\n\n");
}
} else {
- const TargetData *TD = gvn.getTargetData();
+ const DataLayout *TD = gvn.getDataLayout();
assert(TD && "Need target data to handle type mismatch case");
Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
LoadTy, BB->getTerminator(), *TD);
@@ -1299,7 +1302,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
// If new PHI nodes were created, notify alias analysis.
- if (V->getType()->isPointerTy()) {
+ if (V->getType()->getScalarType()->isPointerTy()) {
AliasAnalysis *AA = gvn.getAliasAnalysis();
for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
@@ -1436,7 +1439,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
Instruction *DepInst = DepInfo.getInst();
// Loading the allocation -> undef.
- if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst) ||
+ if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) ||
// Loading immediately after lifetime begin -> undef.
isLifetimeStart(DepInst)) {
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
@@ -1496,7 +1499,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
if (isa<PHINode>(V))
V->takeName(LI);
- if (V->getType()->isPointerTy())
+ if (V->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
markInstructionForDeletion(LI);
++NumGVNLoad;
@@ -1728,7 +1731,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
LI->replaceAllUsesWith(V);
if (isa<PHINode>(V))
V->takeName(LI);
- if (V->getType()->isPointerTy())
+ if (V->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
markInstructionForDeletion(LI);
++NumPRELoad;
@@ -1855,7 +1858,7 @@ bool GVN::processLoad(LoadInst *L) {
// Replace the load!
L->replaceAllUsesWith(AvailVal);
- if (AvailVal->getType()->isPointerTy())
+ if (AvailVal->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(AvailVal);
markInstructionForDeletion(L);
++NumGVNLoad;
@@ -1912,7 +1915,7 @@ bool GVN::processLoad(LoadInst *L) {
// Remove it!
L->replaceAllUsesWith(StoredVal);
- if (StoredVal->getType()->isPointerTy())
+ if (StoredVal->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(StoredVal);
markInstructionForDeletion(L);
++NumGVNLoad;
@@ -1941,7 +1944,7 @@ bool GVN::processLoad(LoadInst *L) {
// Remove it!
patchAndReplaceAllUsesWith(AvailableVal, L);
- if (DepLI->getType()->isPointerTy())
+ if (DepLI->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(DepLI);
markInstructionForDeletion(L);
++NumGVNLoad;
@@ -1951,7 +1954,7 @@ bool GVN::processLoad(LoadInst *L) {
// If this load really doesn't depend on anything, then we must be loading an
// undef value. This can happen when loading for a fresh allocation with no
// intervening stores, for example.
- if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst)) {
+ if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI)) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
markInstructionForDeletion(L);
++NumGVNLoad;
@@ -2182,7 +2185,7 @@ bool GVN::processInstruction(Instruction *I) {
// "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
I->replaceAllUsesWith(V);
- if (MD && V->getType()->isPointerTy())
+ if (MD && V->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
markInstructionForDeletion(I);
++NumGVNSimpl;
@@ -2231,12 +2234,20 @@ bool GVN::processInstruction(Instruction *I) {
Value *SwitchCond = SI->getCondition();
BasicBlock *Parent = SI->getParent();
bool Changed = false;
+
+ // Remember how many outgoing edges there are to every successor.
+ SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
+ for (unsigned i = 0, n = SI->getNumSuccessors(); i != n; ++i)
+ ++SwitchEdges[SI->getSuccessor(i)];
+
for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
i != e; ++i) {
BasicBlock *Dst = i.getCaseSuccessor();
- BasicBlockEdge E(Parent, Dst);
- if (E.isSingleEdge())
+ // If there is only a single edge, propagate the case value into it.
+ if (SwitchEdges.lookup(Dst) == 1) {
+ BasicBlockEdge E(Parent, Dst);
Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E);
+ }
}
return Changed;
}
@@ -2274,7 +2285,7 @@ bool GVN::processInstruction(Instruction *I) {
// Remove it!
patchAndReplaceAllUsesWith(repl, I);
- if (MD && repl->getType()->isPointerTy())
+ if (MD && repl->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(repl);
markInstructionForDeletion(I);
return true;
@@ -2285,7 +2296,7 @@ bool GVN::runOnFunction(Function& F) {
if (!NoLoads)
MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTree>();
- TD = getAnalysisIfAvailable<TargetData>();
+ TD = getAnalysisIfAvailable<DataLayout>();
TLI = &getAnalysis<TargetLibraryInfo>();
VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
VN.setMemDep(MD);
@@ -2522,7 +2533,7 @@ bool GVN::performPRE(Function &F) {
addToLeaderTable(ValNo, Phi, CurrentBlock);
Phi->setDebugLoc(CurInst->getDebugLoc());
CurInst->replaceAllUsesWith(Phi);
- if (Phi->getType()->isPointerTy()) {
+ if (Phi->getType()->getScalarType()->isPointerTy()) {
// Because we have added a PHI-use of the pointer value, it has now
// "escaped" from alias analysis' perspective. We need to inform
// AA of this.
diff --git a/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp b/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp
index b36a3cb..6301aad 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -62,7 +62,7 @@
#include "llvm/Intrinsics.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/ADT/Statistic.h"
@@ -98,9 +98,9 @@ namespace {
}
struct GlobalCmp {
- const TargetData *TD;
+ const DataLayout *TD;
- GlobalCmp(const TargetData *td) : TD(td) { }
+ GlobalCmp(const DataLayout *td) : TD(td) { }
bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) {
Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
@@ -119,7 +119,7 @@ INITIALIZE_PASS(GlobalMerge, "global-merge",
bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst) const {
- const TargetData *TD = TLI->getTargetData();
+ const DataLayout *TD = TLI->getDataLayout();
// FIXME: Infer the maximum possible offset depending on the actual users
// (these max offsets are different for the users inside Thumb or ARM
@@ -170,7 +170,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
bool GlobalMerge::doInitialization(Module &M) {
SmallVector<GlobalVariable*, 16> Globals, ConstGlobals, BSSGlobals;
- const TargetData *TD = TLI->getTargetData();
+ const DataLayout *TD = TLI->getDataLayout();
unsigned MaxOffset = TLI->getMaximalGlobalOffset();
bool Changed = false;
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 37f8bdf..310fd61 100644
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -43,7 +43,8 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -67,7 +68,8 @@ namespace {
LoopInfo *LI;
ScalarEvolution *SE;
DominatorTree *DT;
- TargetData *TD;
+ DataLayout *TD;
+ TargetLibraryInfo *TLI;
SmallVector<WeakVH, 16> DeadInsts;
bool Changed;
@@ -218,8 +220,6 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
/// ConvertToSInt - Convert APF to an integer, if possible.
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
bool isExact = false;
- if (&APF.getSemantics() == &APFloat::PPCDoubleDouble)
- return false;
// See if we can convert this to an int64_t
uint64_t UIntVal;
if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
@@ -414,11 +414,11 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// new comparison.
NewCompare->takeName(Compare);
Compare->replaceAllUsesWith(NewCompare);
- RecursivelyDeleteTriviallyDeadInstructions(Compare);
+ RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
// Delete the old floating point increment.
Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
- RecursivelyDeleteTriviallyDeadInstructions(Incr);
+ RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
// If the FP induction variable still has uses, this is because something else
// in the loop uses its value. In order to canonicalize the induction
@@ -431,7 +431,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
PN->getParent()->getFirstInsertionPt());
PN->replaceAllUsesWith(Conv);
- RecursivelyDeleteTriviallyDeadInstructions(PN);
+ RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
}
Changed = true;
}
@@ -549,15 +549,17 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
PN->setIncomingValue(i, ExitVal);
- // If this instruction is dead now, delete it.
- RecursivelyDeleteTriviallyDeadInstructions(Inst);
+ // If this instruction is dead now, delete it. Don't do it now to avoid
+ // invalidating iterators.
+ if (isInstructionTriviallyDead(Inst, TLI))
+ DeadInsts.push_back(Inst);
if (NumPreds == 1) {
// Completely replace a single-pred PHI. This is safe, because the
// NewVal won't be variant in the loop, so we don't need an LCSSA phi
// node anymore.
PN->replaceAllUsesWith(ExitVal);
- RecursivelyDeleteTriviallyDeadInstructions(PN);
+ PN->eraseFromParent();
}
}
if (NumPreds != 1) {
@@ -595,13 +597,13 @@ namespace {
class WideIVVisitor : public IVVisitor {
ScalarEvolution *SE;
- const TargetData *TD;
+ const DataLayout *TD;
public:
WideIVInfo WI;
WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV,
- const TargetData *TData) :
+ const DataLayout *TData) :
SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; }
// Implement the interface used by simplifyUsersOfIV.
@@ -1259,8 +1261,13 @@ static bool needsLFTR(Loop *L, DominatorTree *DT) {
if (!Phi)
return true;
+ // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
+ int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
+ if (Idx < 0)
+ return true;
+
// Do LFTR if the exit condition's IV is *not* a simple counter.
- Value *IncV = Phi->getIncomingValueForBlock(L->getLoopLatch());
+ Value *IncV = Phi->getIncomingValue(Idx);
return Phi != getLoopPhiForCounter(IncV, L, DT);
}
@@ -1339,7 +1346,7 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
/// could at least handle constant BECounts.
static PHINode *
FindLoopCounter(Loop *L, const SCEV *BECount,
- ScalarEvolution *SE, DominatorTree *DT, const TargetData *TD) {
+ ScalarEvolution *SE, DominatorTree *DT, const DataLayout *TD) {
uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
Value *Cond =
@@ -1696,7 +1703,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTree>();
- TD = getAnalysisIfAvailable<TargetData>();
+ TD = getAnalysisIfAvailable<DataLayout>();
+ TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
DeadInsts.clear();
Changed = false;
@@ -1763,7 +1771,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
while (!DeadInsts.empty())
if (Instruction *Inst =
dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
- RecursivelyDeleteTriviallyDeadInstructions(Inst);
+ RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
// The Rewriter may not be used from this point on.
@@ -1772,7 +1780,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
SinkUnusedInvariants(L);
// Clean up dead instructions.
- Changed |= DeleteDeadPHIs(L->getHeader());
+ Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
// Check a post-condition.
assert(L->isLCSSAForm(*DT) &&
"Indvars did not leave the loop in lcssa form!");
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index dd42c59..e7ffa09 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -23,7 +23,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -75,7 +75,7 @@ namespace {
/// revectored to the false side of the second if.
///
class JumpThreading : public FunctionPass {
- TargetData *TD;
+ DataLayout *TD;
TargetLibraryInfo *TLI;
LazyValueInfo *LVI;
#ifdef NDEBUG
@@ -147,7 +147,7 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
///
bool JumpThreading::runOnFunction(Function &F) {
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
- TD = getAnalysisIfAvailable<TargetData>();
+ TD = getAnalysisIfAvailable<DataLayout>();
TLI = &getAnalysis<TargetLibraryInfo>();
LVI = &getAnalysis<LazyValueInfo>();
@@ -1455,7 +1455,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// At this point, the IR is fully up to date and consistent. Do a quick scan
// over the new instructions and zap any that are constants or dead. This
// frequently happens because of phi translation.
- SimplifyInstructionsInBlock(NewBB, TD);
+ SimplifyInstructionsInBlock(NewBB, TD, TLI);
// Threaded an edge!
++NumThreads;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index 0192e92..4818437 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -46,7 +46,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
@@ -100,7 +100,7 @@ namespace {
LoopInfo *LI; // Current LoopInfo
DominatorTree *DT; // Dominator Tree for the current Loop.
- TargetData *TD; // TargetData for constant folding.
+ DataLayout *TD; // DataLayout for constant folding.
TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding.
// State that is updated as we process loops.
@@ -108,6 +108,9 @@ namespace {
BasicBlock *Preheader; // The preheader block of the current loop...
Loop *CurLoop; // The current loop we are working on...
AliasSetTracker *CurAST; // AliasSet information for the current loop...
+ bool MayThrow; // The current loop contains an instruction which
+ // may throw, thus preventing code motion of
+ // instructions with side effects.
DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
@@ -204,7 +207,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
AA = &getAnalysis<AliasAnalysis>();
DT = &getAnalysis<DominatorTree>();
- TD = getAnalysisIfAvailable<TargetData>();
+ TD = getAnalysisIfAvailable<DataLayout>();
TLI = &getAnalysis<TargetLibraryInfo>();
CurAST = new AliasSetTracker(*AA);
@@ -240,6 +243,15 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
CurAST->add(*BB); // Incorporate the specified basic block
}
+ MayThrow = false;
+ // TODO: We've already searched for instructions which may throw in subloops.
+ // We may want to reuse this information.
+ for (Loop::block_iterator BB = L->block_begin(), BBE = L->block_end();
+ (BB != BBE) && !MayThrow ; ++BB)
+ for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
+ (I != E) && !MayThrow; ++I)
+ MayThrow |= I->mayThrow();
+
// We want to visit all of the instructions in this loop... that are not parts
// of our subloops (they have already had their invariants hoisted out of
// their loop, into this loop, so there is no need to process the BODIES of
@@ -307,7 +319,7 @@ void LICM::SinkRegion(DomTreeNode *N) {
// If the instruction is dead, we would try to sink it because it isn't used
// in the loop, instead, just delete it.
- if (isInstructionTriviallyDead(&I)) {
+ if (isInstructionTriviallyDead(&I, TLI)) {
DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
++II;
CurAST->deleteValue(&I);
@@ -418,17 +430,22 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
if (!FoundMod) return true;
}
- // FIXME: This should use mod/ref information to see if we can hoist or sink
- // the call.
+ // FIXME: This should use mod/ref information to see if we can hoist or
+ // sink the call.
return false;
}
- // Otherwise these instructions are hoistable/sinkable
- return isa<BinaryOperator>(I) || isa<CastInst>(I) ||
- isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
- isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
- isa<ShuffleVectorInst>(I);
+ // Only these instructions are hoistable/sinkable.
+ bool HoistableKind = (isa<BinaryOperator>(I) || isa<CastInst>(I) ||
+ isa<SelectInst>(I) || isa<GetElementPtrInst>(I) ||
+ isa<CmpInst>(I) || isa<InsertElementInst>(I) ||
+ isa<ExtractElementInst>(I) ||
+ isa<ShuffleVectorInst>(I));
+ if (!HoistableKind)
+ return false;
+
+ return isSafeToExecuteUnconditionally(I);
}
/// isNotUsedInLoop - Return true if the only users of this instruction are
@@ -604,6 +621,12 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
}
bool LICM::isGuaranteedToExecute(Instruction &Inst) {
+
+ // Somewhere in this loop there is an instruction which may throw and make us
+ // exit the loop.
+ if (MayThrow)
+ return false;
+
// Otherwise we have to check to make sure that the instruction dominates all
// of the exit blocks. If it doesn't, then there is a path out of the loop
// which does not execute this instruction, so we can't hoist it.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index ac1082c..a44e798 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -54,7 +54,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -65,7 +65,7 @@ STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
namespace {
class LoopIdiomRecognize : public LoopPass {
Loop *CurLoop;
- const TargetData *TD;
+ const DataLayout *TD;
DominatorTree *DT;
ScalarEvolution *SE;
TargetLibraryInfo *TLI;
@@ -132,7 +132,8 @@ Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
/// and zero out all the operands of this instruction. If any of them become
/// dead, delete them and the computation tree that feeds them.
///
-static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
+static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE,
+ const TargetLibraryInfo *TLI) {
SmallVector<Instruction*, 32> NowDeadInsts;
NowDeadInsts.push_back(I);
@@ -153,7 +154,7 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
if (!Op->use_empty()) continue;
if (Instruction *OpI = dyn_cast<Instruction>(Op))
- if (isInstructionTriviallyDead(OpI))
+ if (isInstructionTriviallyDead(OpI, TLI))
NowDeadInsts.push_back(OpI);
}
@@ -164,15 +165,21 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
/// deleteIfDeadInstruction - If the specified value is a dead instruction,
/// delete it and any recursively used instructions.
-static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) {
+static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE,
+ const TargetLibraryInfo *TLI) {
if (Instruction *I = dyn_cast<Instruction>(V))
- if (isInstructionTriviallyDead(I))
- deleteDeadInstruction(I, SE);
+ if (isInstructionTriviallyDead(I, TLI))
+ deleteDeadInstruction(I, SE, TLI);
}
bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
CurLoop = L;
+ // If the loop could not be converted to canonical form, it must have an
+ // indirectbr in it, just give up.
+ if (!L->getLoopPreheader())
+ return false;
+
// Disable loop idiom recognition if the function's name is a common idiom.
StringRef Name = L->getHeader()->getParent()->getName();
if (Name == "memset" || Name == "memcpy")
@@ -192,7 +199,7 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
// We require target data for now.
- TD = getAnalysisIfAvailable<TargetData>();
+ TD = getAnalysisIfAvailable<DataLayout>();
if (TD == 0) return false;
DT = &getAnalysis<DominatorTree>();
@@ -401,7 +408,7 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
///
/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
/// just replicate their input array and then pass on to memset_pattern16.
-static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) {
+static Constant *getMemSetPatternValue(Value *V, const DataLayout &TD) {
// If the value isn't a constant, we can't promote it to being in a constant
// array. We could theoretically do a store to an alloca or something, but
// that doesn't seem worthwhile.
@@ -490,7 +497,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){
Expander.clear();
// If we generated new code for the base pointer, clean up.
- deleteIfDeadInstruction(BasePtr, *SE);
+ deleteIfDeadInstruction(BasePtr, *SE, TLI);
return false;
}
@@ -538,7 +545,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
- deleteDeadInstruction(TheStore, *SE);
+ deleteDeadInstruction(TheStore, *SE, TLI);
++NumMemSet;
return true;
}
@@ -579,7 +586,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
getAnalysis<AliasAnalysis>(), SI)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
- deleteIfDeadInstruction(StoreBasePtr, *SE);
+ deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
return false;
}
@@ -594,8 +601,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
StoreSize, getAnalysis<AliasAnalysis>(), SI)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
- deleteIfDeadInstruction(LoadBasePtr, *SE);
- deleteIfDeadInstruction(StoreBasePtr, *SE);
+ deleteIfDeadInstruction(LoadBasePtr, *SE, TLI);
+ deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
return false;
}
@@ -628,7 +635,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
- deleteDeadInstruction(SI, *SE);
+ deleteDeadInstruction(SI, *SE, TLI);
++NumMemCpy;
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 982400c..558f62e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -18,7 +18,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -66,7 +66,7 @@ Pass *llvm::createLoopInstSimplifyPass() {
bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
LoopInfo *LI = &getAnalysis<LoopInfo>();
- const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SmallVector<BasicBlock*, 8> ExitBlocks;
@@ -120,7 +120,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
++NumSimplified;
}
}
- LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I);
+ LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
if (IsSubloopHeader && !isa<PHINode>(I))
break;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 7eeb152..abe07aa 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -24,6 +24,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -256,6 +257,7 @@ bool LoopRotate::rotateLoop(Loop *L) {
return false;
BasicBlock *OrigHeader = L->getHeader();
+ BasicBlock *OrigLatch = L->getLoopLatch();
BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
if (BI == 0 || BI->isUnconditional())
@@ -267,13 +269,9 @@ bool LoopRotate::rotateLoop(Loop *L) {
if (!L->isLoopExiting(OrigHeader))
return false;
- // Updating PHInodes in loops with multiple exits adds complexity.
- // Keep it simple, and restrict loop rotation to loops with one exit only.
- // In future, lift this restriction and support for multiple exits if
- // required.
- SmallVector<BasicBlock*, 8> ExitBlocks;
- L->getExitBlocks(ExitBlocks);
- if (ExitBlocks.size() > 1)
+ // If the loop latch already contains a branch that leaves the loop then the
+ // loop is already rotated.
+ if (OrigLatch == 0 || L->isLoopExiting(OrigLatch))
return false;
// Check size of original header and reject loop if it is very big.
@@ -286,11 +284,10 @@ bool LoopRotate::rotateLoop(Loop *L) {
// Now, this loop is suitable for rotation.
BasicBlock *OrigPreheader = L->getLoopPreheader();
- BasicBlock *OrigLatch = L->getLoopLatch();
// If the loop could not be converted to canonical form, it must have an
// indirectbr in it, just give up.
- if (OrigPreheader == 0 || OrigLatch == 0)
+ if (OrigPreheader == 0)
return false;
// Anything ScalarEvolution may know about this loop or the PHI nodes
@@ -298,6 +295,8 @@ bool LoopRotate::rotateLoop(Loop *L) {
if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
SE->forgetLoop(L);
+ DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
+
// Find new Loop header. NewHeader is a Header's one and only successor
// that is inside loop. Header's other successor is outside the
// loop. Otherwise loop is not suitable for rotation.
@@ -408,10 +407,19 @@ bool LoopRotate::rotateLoop(Loop *L) {
// Update DominatorTree to reflect the CFG change we just made. Then split
// edges as necessary to preserve LoopSimplify form.
if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
- // Since OrigPreheader now has the conditional branch to Exit block, it is
- // the dominator of Exit.
- DT->changeImmediateDominator(Exit, OrigPreheader);
- DT->changeImmediateDominator(NewHeader, OrigPreheader);
+ // Everything that was dominated by the old loop header is now dominated
+ // by the original loop preheader. Conceptually the header was merged
+ // into the preheader, even though we reuse the actual block as a new
+ // loop latch.
+ DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
+ SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
+ OrigHeaderNode->end());
+ DomTreeNode *OrigPreheaderNode = DT->getNode(OrigPreheader);
+ for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I)
+ DT->changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode);
+
+ assert(DT->getNode(Exit)->getIDom() == OrigPreheaderNode);
+ assert(DT->getNode(NewHeader)->getIDom() == OrigPreheaderNode);
// Update OrigHeader to be dominated by the new header block.
DT->changeImmediateDominator(OrigHeader, OrigLatch);
@@ -440,6 +448,35 @@ bool LoopRotate::rotateLoop(Loop *L) {
// Update OrigHeader to be dominated by the new header block.
DT->changeImmediateDominator(NewHeader, OrigPreheader);
DT->changeImmediateDominator(OrigHeader, OrigLatch);
+
+ // Brute force incremental dominator tree update. Call
+ // findNearestCommonDominator on all CFG predecessors of each child of the
+ // original header.
+ DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
+ SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
+ OrigHeaderNode->end());
+ bool Changed;
+ do {
+ Changed = false;
+ for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) {
+ DomTreeNode *Node = HeaderChildren[I];
+ BasicBlock *BB = Node->getBlock();
+
+ pred_iterator PI = pred_begin(BB);
+ BasicBlock *NearestDom = *PI;
+ for (pred_iterator PE = pred_end(BB); PI != PE; ++PI)
+ NearestDom = DT->findNearestCommonDominator(NearestDom, *PI);
+
+ // Remember if this changes the DomTree.
+ if (Node->getIDom()->getBlock() != NearestDom) {
+ DT->changeImmediateDominator(BB, NearestDom);
+ Changed = true;
+ }
+ }
+
+ // If the dominator changed, this may have an effect on other
+ // predecessors, continue until we reach a fixpoint.
+ } while (Changed);
}
}
@@ -452,6 +489,8 @@ bool LoopRotate::rotateLoop(Loop *L) {
// emitted code isn't too gross in this common case.
MergeBlockIntoPredecessor(OrigHeader, this);
+ DEBUG(dbgs() << "LoopRotation: into "; L->dump());
+
++NumRotated;
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index b14a713..958348d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -54,7 +54,7 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "loop-reduce"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/AddressingMode.h"
#include "llvm/Constants.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
@@ -64,6 +64,7 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Assembly/Writer.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/SmallBitVector.h"
@@ -121,9 +122,11 @@ void RegSortData::print(raw_ostream &OS) const {
OS << "[NumUses=" << UsedByIndices.count() << ']';
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void RegSortData::dump() const {
print(errs()); errs() << '\n';
}
+#endif
namespace {
@@ -223,7 +226,7 @@ namespace {
struct Formula {
/// AM - This is used to represent complex addressing, as well as other kinds
/// of interesting uses.
- TargetLowering::AddrMode AM;
+ AddrMode AM;
/// BaseRegs - The list of "base" registers for this use. When this is
/// non-empty, AM.HasBaseReg should be set to true.
@@ -414,9 +417,11 @@ void Formula::print(raw_ostream &OS) const {
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void Formula::dump() const {
print(errs()); errs() << '\n';
}
+#endif
/// isAddRecSExtable - Return true if the given addrec can be sign-extended
/// without changing its value.
@@ -738,7 +743,8 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
bool Changed = false;
while (!DeadInsts.empty()) {
- Instruction *I = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val());
+ Value *V = DeadInsts.pop_back_val();
+ Instruction *I = dyn_cast_or_null<Instruction>(V);
if (I == 0 || !isInstructionTriviallyDead(I))
continue;
@@ -973,9 +979,11 @@ void Cost::print(raw_ostream &OS) const {
OS << ", plus " << SetupCost << " setup cost";
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void Cost::dump() const {
print(errs()); errs() << '\n';
}
+#endif
namespace {
@@ -1059,9 +1067,11 @@ void LSRFixup::print(raw_ostream &OS) const {
OS << ", Offset=" << Offset;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LSRFixup::dump() const {
print(errs()); errs() << '\n';
}
+#endif
namespace {
@@ -1251,14 +1261,16 @@ void LSRUse::print(raw_ostream &OS) const {
OS << ", widest fixup type: " << *WidestFixupType;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LSRUse::dump() const {
print(errs()); errs() << '\n';
}
+#endif
/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
/// be completely folded into the user instruction at isel time. This includes
/// address-mode folding and special icmp tricks.
-static bool isLegalUse(const TargetLowering::AddrMode &AM,
+static bool isLegalUse(const AddrMode &AM,
LSRUse::KindType Kind, Type *AccessTy,
const TargetLowering *TLI) {
switch (Kind) {
@@ -1315,7 +1327,7 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM,
llvm_unreachable("Invalid LSRUse Kind!");
}
-static bool isLegalUse(TargetLowering::AddrMode AM,
+static bool isLegalUse(AddrMode AM,
int64_t MinOffset, int64_t MaxOffset,
LSRUse::KindType Kind, Type *AccessTy,
const TargetLowering *TLI) {
@@ -1346,7 +1358,7 @@ static bool isAlwaysFoldable(int64_t BaseOffs,
// Conservatively, create an address with an immediate and a
// base and a scale.
- TargetLowering::AddrMode AM;
+ AddrMode AM;
AM.BaseOffs = BaseOffs;
AM.BaseGV = BaseGV;
AM.HasBaseReg = HasBaseReg;
@@ -1384,7 +1396,7 @@ static bool isAlwaysFoldable(const SCEV *S,
// Conservatively, create an address with an immediate and a
// base and a scale.
- TargetLowering::AddrMode AM;
+ AddrMode AM;
AM.BaseOffs = BaseOffs;
AM.BaseGV = BaseGV;
AM.HasBaseReg = HasBaseReg;
@@ -2009,7 +2021,7 @@ LSRInstance::OptimizeLoopTermCond() {
goto decline_post_inc;
// Check for possible scaled-address reuse.
Type *AccessTy = getAccessType(UI->getUser());
- TargetLowering::AddrMode AM;
+ AddrMode AM;
AM.Scale = C->getSExtValue();
if (TLI->isLegalAddressingMode(AM, AccessTy))
goto decline_post_inc;
@@ -3435,9 +3447,11 @@ void WorkItem::print(raw_ostream &OS) const {
<< " , add offset " << Imm;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void WorkItem::dump() const {
print(errs()); errs() << '\n';
}
+#endif
/// GenerateCrossUseConstantOffsets - Look for registers which are a constant
/// distance apart and try to form reuse opportunities between them.
@@ -4451,17 +4465,21 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
SplitLandingPadPredecessors(Parent, BB, "", "", P, NewBBs);
NewBB = NewBBs[0];
}
-
- // If PN is outside of the loop and BB is in the loop, we want to
- // move the block to be immediately before the PHI block, not
- // immediately after BB.
- if (L->contains(BB) && !L->contains(PN))
- NewBB->moveBefore(PN->getParent());
-
- // Splitting the edge can reduce the number of PHI entries we have.
- e = PN->getNumIncomingValues();
- BB = NewBB;
- i = PN->getBasicBlockIndex(BB);
+ // If NewBB==NULL, then SplitCriticalEdge refused to split because all
+ // phi predecessors are identical. The simple thing to do is skip
+ // splitting in this case rather than complicate the API.
+ if (NewBB) {
+ // If PN is outside of the loop and BB is in the loop, we want to
+ // move the block to be immediately before the PHI block, not
+ // immediately after BB.
+ if (L->contains(BB) && !L->contains(PN))
+ NewBB->moveBefore(PN->getParent());
+
+ // Splitting the edge can reduce the number of PHI entries we have.
+ e = PN->getNumIncomingValues();
+ BB = NewBB;
+ i = PN->getBasicBlockIndex(BB);
+ }
}
}
@@ -4730,9 +4748,11 @@ void LSRInstance::print(raw_ostream &OS) const {
print_uses(OS);
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LSRInstance::dump() const {
print(errs()); errs() << '\n';
}
+#endif
namespace {
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 09a186f..0d781ac 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -22,7 +22,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include <climits>
using namespace llvm;
@@ -113,7 +113,7 @@ Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) {
/// ApproximateLoopSize - Approximate the size of the loop.
static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
- const TargetData *TD) {
+ const DataLayout *TD) {
CodeMetrics Metrics;
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
@@ -145,7 +145,8 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// not user specified.
unsigned Threshold = CurrentThreshold;
if (!UserThreshold &&
- Header->getParent()->hasFnAttr(Attribute::OptimizeForSize))
+ Header->getParent()->getFnAttributes().
+ hasAttribute(Attributes::OptimizeForSize))
Threshold = OptSizeUnrollThreshold;
// Find trip count and trip multiple if count is not available
@@ -178,7 +179,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// Enforce the threshold.
if (Threshold != NoThreshold) {
- const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
unsigned NumInlineCandidates;
unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, TD);
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 58f7739..047b43e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -638,7 +638,8 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
// Check to see if it would be profitable to unswitch current loop.
// Do not do non-trivial unswitch while optimizing for size.
- if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize))
+ if (OptimizeForSize ||
+ F->getFnAttributes().hasAttribute(Attributes::OptimizeForSize))
return false;
UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
@@ -906,13 +907,9 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
/// specified.
static void RemoveFromWorklist(Instruction *I,
std::vector<Instruction*> &Worklist) {
- std::vector<Instruction*>::iterator WI = std::find(Worklist.begin(),
- Worklist.end(), I);
- while (WI != Worklist.end()) {
- unsigned Offset = WI-Worklist.begin();
- Worklist.erase(WI);
- WI = std::find(Worklist.begin()+Offset, Worklist.end(), I);
- }
+
+ Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), I),
+ Worklist.end());
}
/// ReplaceUsesOfWith - When we find that I really equals V, remove I from the
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 2a5ee33..517657cf 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -27,7 +27,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include <list>
@@ -38,8 +38,8 @@ STATISTIC(NumMemSetInfer, "Number of memsets inferred");
STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
-static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
- bool &VariableIdxFound, const TargetData &TD){
+static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
+ bool &VariableIdxFound, const DataLayout &TD){
// Skip over the first indices.
gep_type_iterator GTI = gep_type_begin(GEP);
for (unsigned i = 1; i != Idx; ++i, ++GTI)
@@ -72,11 +72,11 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
/// constant offset, and return that constant offset. For example, Ptr1 might
/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8.
static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
- const TargetData &TD) {
+ const DataLayout &TD) {
Ptr1 = Ptr1->stripPointerCasts();
Ptr2 = Ptr2->stripPointerCasts();
- GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
- GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
+ GEPOperator *GEP1 = dyn_cast<GEPOperator>(Ptr1);
+ GEPOperator *GEP2 = dyn_cast<GEPOperator>(Ptr2);
bool VariableIdxFound = false;
@@ -141,12 +141,12 @@ struct MemsetRange {
/// TheStores - The actual stores that make up this range.
SmallVector<Instruction*, 16> TheStores;
- bool isProfitableToUseMemset(const TargetData &TD) const;
+ bool isProfitableToUseMemset(const DataLayout &TD) const;
};
} // end anon namespace
-bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
+bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const {
// If we found more than 4 stores to merge or 16 bytes, use memset.
if (TheStores.size() >= 4 || End-Start >= 16) return true;
@@ -192,9 +192,9 @@ class MemsetRanges {
/// because each element is relatively large and expensive to copy.
std::list<MemsetRange> Ranges;
typedef std::list<MemsetRange>::iterator range_iterator;
- const TargetData &TD;
+ const DataLayout &TD;
public:
- MemsetRanges(const TargetData &td) : TD(td) {}
+ MemsetRanges(const DataLayout &td) : TD(td) {}
typedef std::list<MemsetRange>::const_iterator const_iterator;
const_iterator begin() const { return Ranges.begin(); }
@@ -302,7 +302,7 @@ namespace {
class MemCpyOpt : public FunctionPass {
MemoryDependenceAnalysis *MD;
TargetLibraryInfo *TLI;
- const TargetData *TD;
+ const DataLayout *TD;
public:
static char ID; // Pass identification, replacement for typeid
MemCpyOpt() : FunctionPass(ID) {
@@ -332,7 +332,7 @@ namespace {
bool processMemCpy(MemCpyInst *M);
bool processMemMove(MemMoveInst *M);
bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
- uint64_t cpyLen, CallInst *C);
+ uint64_t cpyLen, unsigned cpyAlign, CallInst *C);
bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
uint64_t MSize);
bool processByValArgument(CallSite CS, unsigned ArgNo);
@@ -509,10 +509,18 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
}
if (C) {
+ unsigned storeAlign = SI->getAlignment();
+ if (!storeAlign)
+ storeAlign = TD->getABITypeAlignment(SI->getOperand(0)->getType());
+ unsigned loadAlign = LI->getAlignment();
+ if (!loadAlign)
+ loadAlign = TD->getABITypeAlignment(LI->getType());
+
bool changed = performCallSlotOptzn(LI,
SI->getPointerOperand()->stripPointerCasts(),
LI->getPointerOperand()->stripPointerCasts(),
- TD->getTypeStoreSize(SI->getOperand(0)->getType()), C);
+ TD->getTypeStoreSize(SI->getOperand(0)->getType()),
+ std::min(storeAlign, loadAlign), C);
if (changed) {
MD->removeInstruction(SI);
SI->eraseFromParent();
@@ -559,7 +567,8 @@ bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
/// the call write its result directly into the destination of the memcpy.
bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
Value *cpyDest, Value *cpySrc,
- uint64_t cpyLen, CallInst *C) {
+ uint64_t cpyLen, unsigned cpyAlign,
+ CallInst *C) {
// The general transformation to keep in mind is
//
// call @func(..., src, ...)
@@ -625,6 +634,16 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
return false;
}
+ // Check that dest points to memory that is at least as aligned as src.
+ unsigned srcAlign = srcAlloca->getAlignment();
+ if (!srcAlign)
+ srcAlign = TD->getABITypeAlignment(srcAlloca->getAllocatedType());
+ bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
+ // If dest is not aligned enough and we can't increase its alignment then
+ // bail out.
+ if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest))
+ return false;
+
// Check that src is not accessed except via the call and the memcpy. This
// guarantees that it holds only undefined values when passed in (so the final
// memcpy can be dropped), that it is not read or written between the call and
@@ -673,20 +692,26 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
bool changedArgument = false;
for (unsigned i = 0; i < CS.arg_size(); ++i)
if (CS.getArgument(i)->stripPointerCasts() == cpySrc) {
- if (cpySrc->getType() != cpyDest->getType())
- cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
- cpyDest->getName(), C);
+ Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest
+ : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
+ cpyDest->getName(), C);
changedArgument = true;
- if (CS.getArgument(i)->getType() == cpyDest->getType())
- CS.setArgument(i, cpyDest);
+ if (CS.getArgument(i)->getType() == Dest->getType())
+ CS.setArgument(i, Dest);
else
- CS.setArgument(i, CastInst::CreatePointerCast(cpyDest,
- CS.getArgument(i)->getType(), cpyDest->getName(), C));
+ CS.setArgument(i, CastInst::CreatePointerCast(Dest,
+ CS.getArgument(i)->getType(), Dest->getName(), C));
}
if (!changedArgument)
return false;
+ // If the destination wasn't sufficiently aligned then increase its alignment.
+ if (!isDestSufficientlyAligned) {
+ assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
+ cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
+ }
+
// Drop any cached information about the call, because we may have changed
// its dependence information by changing its parameter.
MD->removeInstruction(C);
@@ -813,7 +838,8 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
if (DepInfo.isClobber()) {
if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
- CopySize->getZExtValue(), C)) {
+ CopySize->getZExtValue(), M->getAlignment(),
+ C)) {
MD->removeInstruction(M);
M->eraseFromParent();
return true;
@@ -974,7 +1000,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
bool MemCpyOpt::runOnFunction(Function &F) {
bool MadeChange = false;
MD = &getAnalysis<MemoryDependenceAnalysis>();
- TD = getAnalysisIfAvailable<TargetData>();
+ TD = getAnalysisIfAvailable<DataLayout>();
TLI = &getAnalysis<TargetLibraryInfo>();
// If we don't have at least memset and memcpy, there is little point of doing
diff --git a/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp b/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp
index 3222f20..dfdf505 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ObjCARC.cpp
@@ -29,6 +29,7 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "objc-arc"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/ADT/DenseMap.h"
using namespace llvm;
@@ -1120,9 +1121,8 @@ namespace {
bool relatedSelect(const SelectInst *A, const Value *B);
bool relatedPHI(const PHINode *A, const Value *B);
- // Do not implement.
- void operator=(const ProvenanceAnalysis &);
- ProvenanceAnalysis(const ProvenanceAnalysis &);
+ void operator=(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION;
+ ProvenanceAnalysis(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION;
public:
ProvenanceAnalysis() {}
@@ -1236,16 +1236,19 @@ bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) {
// An ObjC-Identified object can't alias a load if it is never locally stored.
if (AIsIdentified) {
+ // Check for an obvious escape.
+ if (isa<LoadInst>(B))
+ return isStoredObjCPointer(A);
if (BIsIdentified) {
- // If both pointers have provenance, they can be directly compared.
- if (A != B)
- return false;
- } else {
- if (isa<LoadInst>(B))
- return isStoredObjCPointer(A);
+ // Check for an obvious escape.
+ if (isa<LoadInst>(A))
+ return isStoredObjCPointer(B);
+ // Both pointers are identified and escapes aren't an evident problem.
+ return false;
}
- } else {
- if (BIsIdentified && isa<LoadInst>(A))
+ } else if (BIsIdentified) {
+ // Check for an obvious escape.
+ if (isa<LoadInst>(A))
return isStoredObjCPointer(B);
}
@@ -1381,9 +1384,6 @@ namespace {
/// PtrState - This class summarizes several per-pointer runtime properties
/// which are propogated through the flow graph.
class PtrState {
- /// NestCount - The known minimum level of retain+release nesting.
- unsigned NestCount;
-
/// KnownPositiveRefCount - True if the reference count is known to
/// be incremented.
bool KnownPositiveRefCount;
@@ -1401,7 +1401,7 @@ namespace {
/// TODO: Encapsulate this better.
RRInfo RRI;
- PtrState() : NestCount(0), KnownPositiveRefCount(false), Partial(false),
+ PtrState() : KnownPositiveRefCount(false), Partial(false),
Seq(S_None) {}
void SetKnownPositiveRefCount() {
@@ -1416,18 +1416,6 @@ namespace {
return KnownPositiveRefCount;
}
- void IncrementNestCount() {
- if (NestCount != UINT_MAX) ++NestCount;
- }
-
- void DecrementNestCount() {
- if (NestCount != 0) --NestCount;
- }
-
- bool IsKnownNested() const {
- return NestCount > 0;
- }
-
void SetSeq(Sequence NewSeq) {
Seq = NewSeq;
}
@@ -1454,7 +1442,6 @@ void
PtrState::Merge(const PtrState &Other, bool TopDown) {
Seq = MergeSeqs(Seq, Other.Seq, TopDown);
KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount;
- NestCount = std::min(NestCount, Other.NestCount);
// We can't merge a plain objc_retain with an objc_retainBlock.
if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock)
@@ -1610,6 +1597,12 @@ void BBState::MergePred(const BBState &Other) {
// loop backedge. Loop backedges are special.
TopDownPathCount += Other.TopDownPathCount;
+ // Check for overflow. If we have overflow, fall back to conservative behavior.
+ if (TopDownPathCount < Other.TopDownPathCount) {
+ clearTopDownPointers();
+ return;
+ }
+
// For each entry in the other set, if our set has an entry with the same key,
// merge the entries. Otherwise, copy the entry and merge it with an empty
// entry.
@@ -1635,6 +1628,12 @@ void BBState::MergeSucc(const BBState &Other) {
// loop backedge. Loop backedges are special.
BottomUpPathCount += Other.BottomUpPathCount;
+ // Check for overflow. If we have overflow, fall back to conservative behavior.
+ if (BottomUpPathCount < Other.BottomUpPathCount) {
+ clearBottomUpPointers();
+ return;
+ }
+
// For each entry in the other set, if our set has an entry with the
// same key, merge the entries. Otherwise, copy the entry and merge
// it with an empty entry.
@@ -1789,7 +1788,9 @@ Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
Type *Params[] = { I8X };
FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
- AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind);
+ AttrListPtr Attributes =
+ AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::get(C, Attributes::NoUnwind));
RetainRVCallee =
M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
Attributes);
@@ -1803,7 +1804,9 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
Type *Params[] = { I8X };
FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
- AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind);
+ AttrListPtr Attributes =
+ AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::get(C, Attributes::NoUnwind));
AutoreleaseRVCallee =
M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy,
Attributes);
@@ -1815,7 +1818,9 @@ Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
if (!ReleaseCallee) {
LLVMContext &C = M->getContext();
Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
- AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind);
+ AttrListPtr Attributes =
+ AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::get(C, Attributes::NoUnwind));
ReleaseCallee =
M->getOrInsertFunction(
"objc_release",
@@ -1829,7 +1834,9 @@ Constant *ObjCARCOpt::getRetainCallee(Module *M) {
if (!RetainCallee) {
LLVMContext &C = M->getContext();
Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
- AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind);
+ AttrListPtr Attributes =
+ AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::get(C, Attributes::NoUnwind));
RetainCallee =
M->getOrInsertFunction(
"objc_retain",
@@ -1858,7 +1865,9 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
if (!AutoreleaseCallee) {
LLVMContext &C = M->getContext();
Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
- AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind);
+ AttrListPtr Attributes =
+ AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::get(C, Attributes::NoUnwind));
AutoreleaseCallee =
M->getOrInsertFunction(
"objc_autorelease",
@@ -1868,6 +1877,26 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
return AutoreleaseCallee;
}
+/// IsPotentialUse - Test whether the given value is possible a
+/// reference-counted pointer, including tests which utilize AliasAnalysis.
+static bool IsPotentialUse(const Value *Op, AliasAnalysis &AA) {
+ // First make the rudimentary check.
+ if (!IsPotentialUse(Op))
+ return false;
+
+ // Objects in constant memory are not reference-counted.
+ if (AA.pointsToConstantMemory(Op))
+ return false;
+
+ // Pointers in constant memory are not pointing to reference-counted objects.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(Op))
+ if (AA.pointsToConstantMemory(LI->getPointerOperand()))
+ return false;
+
+ // Otherwise assume the worst.
+ return true;
+}
+
/// CanAlterRefCount - Test whether the given instruction can result in a
/// reference count modification (positive or negative) for the pointer's
/// object.
@@ -1894,7 +1923,7 @@ CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
I != E; ++I) {
const Value *Op = *I;
- if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+ if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op))
return true;
}
return false;
@@ -1919,14 +1948,14 @@ CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
// Comparing a pointer with null, or any other constant, isn't really a use,
// because we don't care what the pointer points to, or about the values
// of any other dynamic reference-counted pointers.
- if (!IsPotentialUse(ICI->getOperand(1)))
+ if (!IsPotentialUse(ICI->getOperand(1), *PA.getAA()))
return false;
} else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) {
// For calls, just check the arguments (and not the callee operand).
for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(),
OE = CS.arg_end(); OI != OE; ++OI) {
const Value *Op = *OI;
- if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+ if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op))
return true;
}
return false;
@@ -1936,14 +1965,14 @@ CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand());
// If we can't tell what the underlying object was, assume there is a
// dependence.
- return IsPotentialUse(Op) && PA.related(Op, Ptr);
+ return IsPotentialUse(Op, *PA.getAA()) && PA.related(Op, Ptr);
}
// Check each operand for a match.
for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end();
OI != OE; ++OI) {
const Value *Op = *OI;
- if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+ if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op))
return true;
}
return false;
@@ -2612,11 +2641,11 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release);
S.RRI.ReleaseMetadata = ReleaseMetadata;
- S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented();
+ S.RRI.KnownSafe = S.IsKnownIncremented();
S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
S.RRI.Calls.insert(Inst);
- S.IncrementNestCount();
+ S.SetKnownPositiveRefCount();
break;
}
case IC_RetainBlock:
@@ -2631,7 +2660,6 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
PtrState &S = MyStates.getPtrBottomUpState(Arg);
S.SetKnownPositiveRefCount();
- S.DecrementNestCount();
switch (S.GetSeq()) {
case S_Stop:
@@ -2747,8 +2775,9 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
// Merge the states from each successor to compute the initial state
// for the current block.
- for (BBState::edge_iterator SI(MyStates.succ_begin()),
- SE(MyStates.succ_end()); SI != SE; ++SI) {
+ BBState::edge_iterator SI(MyStates.succ_begin()),
+ SE(MyStates.succ_end());
+ if (SI != SE) {
const BasicBlock *Succ = *SI;
DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
assert(I != BBStates.end());
@@ -2760,7 +2789,6 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
assert(I != BBStates.end());
MyStates.MergeSucc(I->second);
}
- break;
}
// Visit all the instructions, bottom-up.
@@ -2823,12 +2851,11 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
S.ResetSequenceProgress(S_Retain);
S.RRI.IsRetainBlock = Class == IC_RetainBlock;
- // Don't check S.IsKnownIncremented() here because it's not sufficient.
- S.RRI.KnownSafe = S.IsKnownNested();
+ S.RRI.KnownSafe = S.IsKnownIncremented();
S.RRI.Calls.insert(Inst);
}
- S.IncrementNestCount();
+ S.SetKnownPositiveRefCount();
// A retain can be a potential use; procede to the generic checking
// code below.
@@ -2838,7 +2865,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
Arg = GetObjCArg(Inst);
PtrState &S = MyStates.getPtrTopDownState(Arg);
- S.DecrementNestCount();
+ S.ClearRefCount();
switch (S.GetSeq()) {
case S_Retain:
@@ -2935,8 +2962,9 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
// Merge the states from each predecessor to compute the initial state
// for the current block.
- for (BBState::edge_iterator PI(MyStates.pred_begin()),
- PE(MyStates.pred_end()); PI != PE; ++PI) {
+ BBState::edge_iterator PI(MyStates.pred_begin()),
+ PE(MyStates.pred_end());
+ if (PI != PE) {
const BasicBlock *Pred = *PI;
DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
assert(I != BBStates.end());
@@ -2948,7 +2976,6 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
assert(I != BBStates.end());
MyStates.MergePred(I->second);
}
- break;
}
// Visit all the instructions, top-down.
@@ -3532,19 +3559,19 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) {
}
/// OptimizeReturns - Look for this pattern:
-///
+/// \code
/// %call = call i8* @something(...)
/// %2 = call i8* @objc_retain(i8* %call)
/// %3 = call i8* @objc_autorelease(i8* %2)
/// ret i8* %3
-///
+/// \endcode
/// And delete the retain and autorelease.
///
/// Otherwise if it's just this:
-///
+/// \code
/// %3 = call i8* @objc_autorelease(i8* %2)
/// ret i8* %3
-///
+/// \endcode
/// convert the autorelease to autoreleaseRV.
void ObjCARCOpt::OptimizeReturns(Function &F) {
if (!F.getReturnType()->isPointerTy())
@@ -3814,8 +3841,9 @@ Constant *ObjCARCContract::getStoreStrongCallee(Module *M) {
Type *Params[] = { I8XX, I8X };
AttrListPtr Attributes = AttrListPtr()
- .addAttr(~0u, Attribute::NoUnwind)
- .addAttr(1, Attribute::NoCapture);
+ .addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::get(C, Attributes::NoUnwind))
+ .addAttr(M->getContext(), 1, Attributes::get(C, Attributes::NoCapture));
StoreStrongCallee =
M->getOrInsertFunction(
@@ -3832,7 +3860,9 @@ Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
Type *Params[] = { I8X };
FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
- AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind);
+ AttrListPtr Attributes =
+ AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::get(C, Attributes::NoUnwind));
RetainAutoreleaseCallee =
M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes);
}
@@ -3845,7 +3875,9 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
Type *Params[] = { I8X };
FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
- AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind);
+ AttrListPtr Attributes =
+ AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::get(C, Attributes::NoUnwind));
RetainAutoreleaseRVCallee =
M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy,
Attributes);
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 09687d8..7a40797 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -339,36 +339,6 @@ static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
}
}
-/// EvaluateRepeatedConstant - Compute C op C op ... op C where the constant C
-/// is repeated Weight times.
-static Constant *EvaluateRepeatedConstant(unsigned Opcode, Constant *C,
- APInt Weight) {
- // For addition the result can be efficiently computed as the product of the
- // constant and the weight.
- if (Opcode == Instruction::Add)
- return ConstantExpr::getMul(C, ConstantInt::get(C->getContext(), Weight));
-
- // The weight might be huge, so compute by repeated squaring to ensure that
- // compile time is proportional to the logarithm of the weight.
- Constant *Result = 0;
- Constant *Power = C; // Successively C, C op C, (C op C) op (C op C) etc.
- // Visit the bits in Weight.
- while (Weight != 0) {
- // If the current bit in Weight is non-zero do Result = Result op Power.
- if (Weight[0])
- Result = Result ? ConstantExpr::get(Opcode, Result, Power) : Power;
- // Move on to the next bit if any more are non-zero.
- Weight = Weight.lshr(1);
- if (Weight.isMinValue())
- break;
- // Square the power.
- Power = ConstantExpr::get(Opcode, Power, Power);
- }
-
- assert(Result && "Only positive weights supported!");
- return Result;
-}
-
typedef std::pair<Value*, APInt> RepeatedValue;
/// LinearizeExprTree - Given an associative binary expression, return the leaf
@@ -382,9 +352,7 @@ typedef std::pair<Value*, APInt> RepeatedValue;
/// op
/// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times
///
-/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct, and
-/// they are all non-constant except possibly for the last one, which if it is
-/// constant will have weight one (Ops[N].second === 1).
+/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct.
///
/// This routine may modify the function, in which case it returns 'true'. The
/// changes it makes may well be destructive, changing the value computed by 'I'
@@ -604,7 +572,6 @@ static bool LinearizeExprTree(BinaryOperator *I,
// The leaves, repeated according to their weights, represent the linearized
// form of the expression.
- Constant *Cst = 0; // Accumulate constants here.
for (unsigned i = 0, e = LeafOrder.size(); i != e; ++i) {
Value *V = LeafOrder[i];
LeafMap::iterator It = Leaves.find(V);
@@ -618,31 +585,14 @@ static bool LinearizeExprTree(BinaryOperator *I,
continue;
// Ensure the leaf is only output once.
It->second = 0;
- // Glob all constants together into Cst.
- if (Constant *C = dyn_cast<Constant>(V)) {
- C = EvaluateRepeatedConstant(Opcode, C, Weight);
- Cst = Cst ? ConstantExpr::get(Opcode, Cst, C) : C;
- continue;
- }
- // Add non-constant
Ops.push_back(std::make_pair(V, Weight));
}
- // Add any constants back into Ops, all globbed together and reduced to having
- // weight 1 for the convenience of users.
- Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
- if (Cst && Cst != Identity) {
- // If combining multiple constants resulted in the absorber then the entire
- // expression must evaluate to the absorber.
- if (Cst == Absorber)
- Ops.clear();
- Ops.push_back(std::make_pair(Cst, APInt(Bitwidth, 1)));
- }
-
// For nilpotent operations or addition there may be no operands, for example
// because the expression was "X xor X" or consisted of 2^Bitwidth additions:
// in both cases the weight reduces to 0 causing the value to be skipped.
if (Ops.empty()) {
+ Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
assert(Identity && "Associative operation without identity!");
Ops.push_back(std::make_pair(Identity, APInt(Bitwidth, 1)));
}
@@ -656,8 +606,8 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
SmallVectorImpl<ValueEntry> &Ops) {
assert(Ops.size() > 1 && "Single values should be used directly!");
- // Since our optimizations never increase the number of operations, the new
- // expression can always be written by reusing the existing binary operators
+ // Since our optimizations should never increase the number of operations, the
+ // new expression can usually be written reusing the existing binary operators
// from the original expression tree, without creating any new instructions,
// though the rewritten expression may have a completely different topology.
// We take care to not change anything if the new expression will be the same
@@ -671,6 +621,20 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
unsigned Opcode = I->getOpcode();
BinaryOperator *Op = I;
+ /// NotRewritable - The operands being written will be the leaves of the new
+ /// expression and must not be used as inner nodes (via NodesToRewrite) by
+ /// mistake. Inner nodes are always reassociable, and usually leaves are not
+ /// (if they were they would have been incorporated into the expression and so
+ /// would not be leaves), so most of the time there is no danger of this. But
+ /// in rare cases a leaf may become reassociable if an optimization kills uses
+ /// of it, or it may momentarily become reassociable during rewriting (below)
+ /// due it being removed as an operand of one of its uses. Ensure that misuse
+ /// of leaf nodes as inner nodes cannot occur by remembering all of the future
+ /// leaves and refusing to reuse any of them as inner nodes.
+ SmallPtrSet<Value*, 8> NotRewritable;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ NotRewritable.insert(Ops[i].Op);
+
// ExpressionChanged - Non-null if the rewritten expression differs from the
// original in some non-trivial way, requiring the clearing of optional flags.
// Flags are cleared from the operator in ExpressionChanged up to I inclusive.
@@ -703,12 +667,14 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
// the old operands with the new ones.
DEBUG(dbgs() << "RA: " << *Op << '\n');
if (NewLHS != OldLHS) {
- if (BinaryOperator *BO = isReassociableOp(OldLHS, Opcode))
+ BinaryOperator *BO = isReassociableOp(OldLHS, Opcode);
+ if (BO && !NotRewritable.count(BO))
NodesToRewrite.push_back(BO);
Op->setOperand(0, NewLHS);
}
if (NewRHS != OldRHS) {
- if (BinaryOperator *BO = isReassociableOp(OldRHS, Opcode))
+ BinaryOperator *BO = isReassociableOp(OldRHS, Opcode);
+ if (BO && !NotRewritable.count(BO))
NodesToRewrite.push_back(BO);
Op->setOperand(1, NewRHS);
}
@@ -732,7 +698,8 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
Op->swapOperands();
} else {
// Overwrite with the new right-hand side.
- if (BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode))
+ BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode);
+ if (BO && !NotRewritable.count(BO))
NodesToRewrite.push_back(BO);
Op->setOperand(1, NewRHS);
ExpressionChanged = Op;
@@ -745,7 +712,8 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
// Now deal with the left-hand side. If this is already an operation node
// from the original expression then just rewrite the rest of the expression
// into it.
- if (BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode)) {
+ BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode);
+ if (BO && !NotRewritable.count(BO)) {
Op = BO;
continue;
}
@@ -1446,9 +1414,26 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
SmallVectorImpl<ValueEntry> &Ops) {
// Now that we have the linearized expression tree, try to optimize it.
// Start by folding any constants that we found.
- if (Ops.size() == 1) return Ops[0].Op;
-
+ Constant *Cst = 0;
unsigned Opcode = I->getOpcode();
+ while (!Ops.empty() && isa<Constant>(Ops.back().Op)) {
+ Constant *C = cast<Constant>(Ops.pop_back_val().Op);
+ Cst = Cst ? ConstantExpr::get(Opcode, C, Cst) : C;
+ }
+ // If there was nothing but constants then we are done.
+ if (Ops.empty())
+ return Cst;
+
+ // Put the combined constant back at the end of the operand list, except if
+ // there is no point. For example, an add of 0 gets dropped here, while a
+ // multiplication by zero turns the whole expression into zero.
+ if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType())) {
+ if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, I->getType()))
+ return Cst;
+ Ops.push_back(ValueEntry(0, Cst));
+ }
+
+ if (Ops.size() == 1) return Ops[0].Op;
// Handle destructive annihilation due to identities between elements in the
// argument list here.
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
index 2c39aab..686520e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -26,7 +26,7 @@
#include "llvm/Pass.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
@@ -153,7 +153,7 @@ namespace {
/// Constant Propagation.
///
class SCCPSolver : public InstVisitor<SCCPSolver> {
- const TargetData *TD;
+ const DataLayout *TD;
const TargetLibraryInfo *TLI;
SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable.
DenseMap<Value*, LatticeVal> ValueState; // The state each value is in.
@@ -205,7 +205,7 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
typedef std::pair<BasicBlock*, BasicBlock*> Edge;
DenseSet<Edge> KnownFeasibleEdges;
public:
- SCCPSolver(const TargetData *td, const TargetLibraryInfo *tli)
+ SCCPSolver(const DataLayout *td, const TargetLibraryInfo *tli)
: TD(td), TLI(tli) {}
/// MarkBlockExecutable - This method can be used by clients to mark all of
@@ -1564,7 +1564,7 @@ static void DeleteInstructionInBlock(BasicBlock *BB) {
//
bool SCCP::runOnFunction(Function &F) {
DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
- const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SCCPSolver Solver(TD, TLI);
@@ -1693,7 +1693,7 @@ static bool AddressIsTaken(const GlobalValue *GV) {
}
bool IPSCCP::runOnModule(Module &M) {
- const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SCCPSolver Solver(TD, TLI);
diff --git a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
new file mode 100644
index 0000000..ccc2f7a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -0,0 +1,3697 @@
+//===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This transformation implements the well known scalar replacement of
+/// aggregates transformation. It tries to identify promotable elements of an
+/// aggregate alloca, and promote them to registers. It will also try to
+/// convert uses of an element (or set of elements) of an alloca into a vector
+/// or bitfield-style integer scalar if appropriate.
+///
+/// It works to do this with minimal slicing of the alloca so that regions
+/// which are merely transferred in and out of external memory remain unchanged
+/// and are not decomposed to scalar code.
+///
+/// Because this also performs alloca promotion, it can be thought of as also
+/// serving the purpose of SSA formation. The algorithm iterates on the
+/// function until all opportunities for promotion have been realized.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sroa"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/DataLayout.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+using namespace llvm;
+
+STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
+STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
+STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
+STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
+STATISTIC(NumDeleted, "Number of instructions deleted");
+STATISTIC(NumVectorized, "Number of vectorized aggregates");
+
+/// Hidden option to force the pass to not use DomTree and mem2reg, instead
+/// forming SSA values through the SSAUpdater infrastructure.
+static cl::opt<bool>
+ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden);
+
+namespace {
+/// \brief Alloca partitioning representation.
+///
+/// This class represents a partitioning of an alloca into slices, and
+/// information about the nature of uses of each slice of the alloca. The goal
+/// is that this information is sufficient to decide if and how to split the
+/// alloca apart and replace slices with scalars. It is also intended that this
+/// structure can capture the relevant information needed both to decide about
+/// and to enact these transformations.
+class AllocaPartitioning {
+public:
+ /// \brief A common base class for representing a half-open byte range.
+ struct ByteRange {
+ /// \brief The beginning offset of the range.
+ uint64_t BeginOffset;
+
+ /// \brief The ending offset, not included in the range.
+ uint64_t EndOffset;
+
+ ByteRange() : BeginOffset(), EndOffset() {}
+ ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
+ : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
+
+ /// \brief Support for ordering ranges.
+ ///
+ /// This provides an ordering over ranges such that start offsets are
+ /// always increasing, and within equal start offsets, the end offsets are
+ /// decreasing. Thus the spanning range comes first in a cluster with the
+ /// same start position.
+ bool operator<(const ByteRange &RHS) const {
+ if (BeginOffset < RHS.BeginOffset) return true;
+ if (BeginOffset > RHS.BeginOffset) return false;
+ if (EndOffset > RHS.EndOffset) return true;
+ return false;
+ }
+
+ /// \brief Support comparison with a single offset to allow binary searches.
+ friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
+ return LHS.BeginOffset < RHSOffset;
+ }
+
+ friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
+ const ByteRange &RHS) {
+ return LHSOffset < RHS.BeginOffset;
+ }
+
+ bool operator==(const ByteRange &RHS) const {
+ return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
+ }
+ bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
+ };
+
+ /// \brief A partition of an alloca.
+ ///
+ /// This structure represents a contiguous partition of the alloca. These are
+ /// formed by examining the uses of the alloca. During formation, they may
+ /// overlap but once an AllocaPartitioning is built, the Partitions within it
+ /// are all disjoint.
+ struct Partition : public ByteRange {
+ /// \brief Whether this partition is splittable into smaller partitions.
+ ///
+ /// We flag partitions as splittable when they are formed entirely due to
+ /// accesses by trivially splittable operations such as memset and memcpy.
+ bool IsSplittable;
+
+ /// \brief Test whether a partition has been marked as dead.
+ bool isDead() const {
+ if (BeginOffset == UINT64_MAX) {
+ assert(EndOffset == UINT64_MAX);
+ return true;
+ }
+ return false;
+ }
+
+ /// \brief Kill a partition.
+ /// This is accomplished by setting both its beginning and end offset to
+ /// the maximum possible value.
+ void kill() {
+ assert(!isDead() && "He's Dead, Jim!");
+ BeginOffset = EndOffset = UINT64_MAX;
+ }
+
+ Partition() : ByteRange(), IsSplittable() {}
+ Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
+ : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
+ };
+
+ /// \brief A particular use of a partition of the alloca.
+ ///
+ /// This structure is used to associate uses of a partition with it. They
+ /// mark the range of bytes which are referenced by a particular instruction,
+ /// and includes a handle to the user itself and the pointer value in use.
+ /// The bounds of these uses are determined by intersecting the bounds of the
+ /// memory use itself with a particular partition. As a consequence there is
+ /// intentionally overlap between various uses of the same partition.
+ struct PartitionUse : public ByteRange {
+ /// \brief The use in question. Provides access to both user and used value.
+ ///
+ /// Note that this may be null if the partition use is *dead*, that is, it
+ /// should be ignored.
+ Use *U;
+
+ PartitionUse() : ByteRange(), U() {}
+ PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U)
+ : ByteRange(BeginOffset, EndOffset), U(U) {}
+ };
+
+ /// \brief Construct a partitioning of a particular alloca.
+ ///
+ /// Construction does most of the work for partitioning the alloca. This
+ /// performs the necessary walks of users and builds a partitioning from it.
+ AllocaPartitioning(const DataLayout &TD, AllocaInst &AI);
+
+ /// \brief Test whether a pointer to the allocation escapes our analysis.
+ ///
+ /// If this is true, the partitioning is never fully built and should be
+ /// ignored.
+ bool isEscaped() const { return PointerEscapingInstr; }
+
+ /// \brief Support for iterating over the partitions.
+ /// @{
+ typedef SmallVectorImpl<Partition>::iterator iterator;
+ iterator begin() { return Partitions.begin(); }
+ iterator end() { return Partitions.end(); }
+
+ typedef SmallVectorImpl<Partition>::const_iterator const_iterator;
+ const_iterator begin() const { return Partitions.begin(); }
+ const_iterator end() const { return Partitions.end(); }
+ /// @}
+
+ /// \brief Support for iterating over and manipulating a particular
+ /// partition's uses.
+ ///
+ /// The iteration support provided for uses is more limited, but also
+ /// includes some manipulation routines to support rewriting the uses of
+ /// partitions during SROA.
+ /// @{
+ typedef SmallVectorImpl<PartitionUse>::iterator use_iterator;
+ use_iterator use_begin(unsigned Idx) { return Uses[Idx].begin(); }
+ use_iterator use_begin(const_iterator I) { return Uses[I - begin()].begin(); }
+ use_iterator use_end(unsigned Idx) { return Uses[Idx].end(); }
+ use_iterator use_end(const_iterator I) { return Uses[I - begin()].end(); }
+
+ typedef SmallVectorImpl<PartitionUse>::const_iterator const_use_iterator;
+ const_use_iterator use_begin(unsigned Idx) const { return Uses[Idx].begin(); }
+ const_use_iterator use_begin(const_iterator I) const {
+ return Uses[I - begin()].begin();
+ }
+ const_use_iterator use_end(unsigned Idx) const { return Uses[Idx].end(); }
+ const_use_iterator use_end(const_iterator I) const {
+ return Uses[I - begin()].end();
+ }
+
+ unsigned use_size(unsigned Idx) const { return Uses[Idx].size(); }
+ unsigned use_size(const_iterator I) const { return Uses[I - begin()].size(); }
+ const PartitionUse &getUse(unsigned PIdx, unsigned UIdx) const {
+ return Uses[PIdx][UIdx];
+ }
+ const PartitionUse &getUse(const_iterator I, unsigned UIdx) const {
+ return Uses[I - begin()][UIdx];
+ }
+
+ void use_push_back(unsigned Idx, const PartitionUse &PU) {
+ Uses[Idx].push_back(PU);
+ }
+ void use_push_back(const_iterator I, const PartitionUse &PU) {
+ Uses[I - begin()].push_back(PU);
+ }
+ /// @}
+
+ /// \brief Allow iterating the dead users for this alloca.
+ ///
+ /// These are instructions which will never actually use the alloca as they
+ /// are outside the allocated range. They are safe to replace with undef and
+ /// delete.
+ /// @{
+ typedef SmallVectorImpl<Instruction *>::const_iterator dead_user_iterator;
+ dead_user_iterator dead_user_begin() const { return DeadUsers.begin(); }
+ dead_user_iterator dead_user_end() const { return DeadUsers.end(); }
+ /// @}
+
+ /// \brief Allow iterating the dead expressions referring to this alloca.
+ ///
+ /// These are operands which have cannot actually be used to refer to the
+ /// alloca as they are outside its range and the user doesn't correct for
+ /// that. These mostly consist of PHI node inputs and the like which we just
+ /// need to replace with undef.
+ /// @{
+ typedef SmallVectorImpl<Use *>::const_iterator dead_op_iterator;
+ dead_op_iterator dead_op_begin() const { return DeadOperands.begin(); }
+ dead_op_iterator dead_op_end() const { return DeadOperands.end(); }
+ /// @}
+
+ /// \brief MemTransferInst auxiliary data.
+ /// This struct provides some auxiliary data about memory transfer
+ /// intrinsics such as memcpy and memmove. These intrinsics can use two
+ /// different ranges within the same alloca, and provide other challenges to
+ /// correctly represent. We stash extra data to help us untangle this
+ /// after the partitioning is complete.
+ struct MemTransferOffsets {
+ /// The destination begin and end offsets when the destination is within
+ /// this alloca. If the end offset is zero the destination is not within
+ /// this alloca.
+ uint64_t DestBegin, DestEnd;
+
+ /// The source begin and end offsets when the source is within this alloca.
+ /// If the end offset is zero, the source is not within this alloca.
+ uint64_t SourceBegin, SourceEnd;
+
+ /// Flag for whether an alloca is splittable.
+ bool IsSplittable;
+ };
+ MemTransferOffsets getMemTransferOffsets(MemTransferInst &II) const {
+ return MemTransferInstData.lookup(&II);
+ }
+
+ /// \brief Map from a PHI or select operand back to a partition.
+ ///
+ /// When manipulating PHI nodes or selects, they can use more than one
+ /// partition of an alloca. We store a special mapping to allow finding the
+ /// partition referenced by each of these operands, if any.
+ iterator findPartitionForPHIOrSelectOperand(Use *U) {
+ SmallDenseMap<Use *, std::pair<unsigned, unsigned> >::const_iterator MapIt
+ = PHIOrSelectOpMap.find(U);
+ if (MapIt == PHIOrSelectOpMap.end())
+ return end();
+
+ return begin() + MapIt->second.first;
+ }
+
+ /// \brief Map from a PHI or select operand back to the specific use of
+ /// a partition.
+ ///
+ /// Similar to mapping these operands back to the partitions, this maps
+ /// directly to the use structure of that partition.
+ use_iterator findPartitionUseForPHIOrSelectOperand(Use *U) {
+ SmallDenseMap<Use *, std::pair<unsigned, unsigned> >::const_iterator MapIt
+ = PHIOrSelectOpMap.find(U);
+ assert(MapIt != PHIOrSelectOpMap.end());
+ return Uses[MapIt->second.first].begin() + MapIt->second.second;
+ }
+
+ /// \brief Compute a common type among the uses of a particular partition.
+ ///
+ /// This routines walks all of the uses of a particular partition and tries
+ /// to find a common type between them. Untyped operations such as memset and
+ /// memcpy are ignored.
+ Type *getCommonType(iterator I) const;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;
+ void printUsers(raw_ostream &OS, const_iterator I,
+ StringRef Indent = " ") const;
+ void print(raw_ostream &OS) const;
+ void LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED dump(const_iterator I) const;
+ void LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED dump() const;
+#endif
+
+private:
+ template <typename DerivedT, typename RetT = void> class BuilderBase;
+ class PartitionBuilder;
+ friend class AllocaPartitioning::PartitionBuilder;
+ class UseBuilder;
+ friend class AllocaPartitioning::UseBuilder;
+
+#ifndef NDEBUG
+ /// \brief Handle to alloca instruction to simplify method interfaces.
+ AllocaInst &AI;
+#endif
+
+ /// \brief The instruction responsible for this alloca having no partitioning.
+ ///
+ /// When an instruction (potentially) escapes the pointer to the alloca, we
+ /// store a pointer to that here and abort trying to partition the alloca.
+ /// This will be null if the alloca is partitioned successfully.
+ Instruction *PointerEscapingInstr;
+
+ /// \brief The partitions of the alloca.
+ ///
+ /// We store a vector of the partitions over the alloca here. This vector is
+ /// sorted by increasing begin offset, and then by decreasing end offset. See
+ /// the Partition inner class for more details. Initially (during
+ /// construction) there are overlaps, but we form a disjoint sequence of
+ /// partitions while finishing construction and a fully constructed object is
+ /// expected to always have this as a disjoint space.
+ SmallVector<Partition, 8> Partitions;
+
+ /// \brief The uses of the partitions.
+ ///
+ /// This is essentially a mapping from each partition to a list of uses of
+ /// that partition. The mapping is done with a Uses vector that has the exact
+ /// same number of entries as the partition vector. Each entry is itself
+ /// a vector of the uses.
+ SmallVector<SmallVector<PartitionUse, 2>, 8> Uses;
+
+ /// \brief Instructions which will become dead if we rewrite the alloca.
+ ///
+ /// Note that these are not separated by partition. This is because we expect
+ /// a partitioned alloca to be completely rewritten or not rewritten at all.
+ /// If rewritten, all these instructions can simply be removed and replaced
+ /// with undef as they come from outside of the allocated space.
+ SmallVector<Instruction *, 8> DeadUsers;
+
+ /// \brief Operands which will become dead if we rewrite the alloca.
+ ///
+ /// These are operands that in their particular use can be replaced with
+ /// undef when we rewrite the alloca. These show up in out-of-bounds inputs
+ /// to PHI nodes and the like. They aren't entirely dead (there might be
+ /// a GEP back into the bounds using it elsewhere) and nor is the PHI, but we
+ /// want to swap this particular input for undef to simplify the use lists of
+ /// the alloca.
+ SmallVector<Use *, 8> DeadOperands;
+
+ /// \brief The underlying storage for auxiliary memcpy and memset info.
+ SmallDenseMap<MemTransferInst *, MemTransferOffsets, 4> MemTransferInstData;
+
+ /// \brief A side datastructure used when building up the partitions and uses.
+ ///
+ /// This mapping is only really used during the initial building of the
+ /// partitioning so that we can retain information about PHI and select nodes
+ /// processed.
+ SmallDenseMap<Instruction *, std::pair<uint64_t, bool> > PHIOrSelectSizes;
+
+ /// \brief Auxiliary information for particular PHI or select operands.
+ SmallDenseMap<Use *, std::pair<unsigned, unsigned>, 4> PHIOrSelectOpMap;
+
+ /// \brief A utility routine called from the constructor.
+ ///
+ /// This does what it says on the tin. It is the key of the alloca partition
+ /// splitting and merging. After it is called we have the desired disjoint
+ /// collection of partitions.
+ void splitAndMergePartitions();
+};
+}
+
+template <typename DerivedT, typename RetT>
+class AllocaPartitioning::BuilderBase
+ : public InstVisitor<DerivedT, RetT> {
+public:
+ BuilderBase(const DataLayout &TD, AllocaInst &AI, AllocaPartitioning &P)
+ : TD(TD),
+ AllocSize(TD.getTypeAllocSize(AI.getAllocatedType())),
+ P(P) {
+ enqueueUsers(AI, 0);
+ }
+
+protected:
+ const DataLayout &TD;
+ const uint64_t AllocSize;
+ AllocaPartitioning &P;
+
+ SmallPtrSet<Use *, 8> VisitedUses;
+
+ struct OffsetUse {
+ Use *U;
+ int64_t Offset;
+ };
+ SmallVector<OffsetUse, 8> Queue;
+
+ // The active offset and use while visiting.
+ Use *U;
+ int64_t Offset;
+
+ void enqueueUsers(Instruction &I, int64_t UserOffset) {
+ for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
+ UI != UE; ++UI) {
+ if (VisitedUses.insert(&UI.getUse())) {
+ OffsetUse OU = { &UI.getUse(), UserOffset };
+ Queue.push_back(OU);
+ }
+ }
+ }
+
+ bool computeConstantGEPOffset(GetElementPtrInst &GEPI, int64_t &GEPOffset) {
+ GEPOffset = Offset;
+ for (gep_type_iterator GTI = gep_type_begin(GEPI), GTE = gep_type_end(GEPI);
+ GTI != GTE; ++GTI) {
+ ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
+ if (!OpC)
+ return false;
+ if (OpC->isZero())
+ continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ unsigned ElementIdx = OpC->getZExtValue();
+ const StructLayout *SL = TD.getStructLayout(STy);
+ uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
+ // Check that we can continue to model this GEP in a signed 64-bit offset.
+ if (ElementOffset > INT64_MAX ||
+ (GEPOffset >= 0 &&
+ ((uint64_t)GEPOffset + ElementOffset) > INT64_MAX)) {
+ DEBUG(dbgs() << "WARNING: Encountered a cumulative offset exceeding "
+ << "what can be represented in an int64_t!\n"
+ << " alloca: " << P.AI << "\n");
+ return false;
+ }
+ if (GEPOffset < 0)
+ GEPOffset = ElementOffset + (uint64_t)-GEPOffset;
+ else
+ GEPOffset += ElementOffset;
+ continue;
+ }
+
+ APInt Index = OpC->getValue().sextOrTrunc(TD.getPointerSizeInBits());
+ Index *= APInt(Index.getBitWidth(),
+ TD.getTypeAllocSize(GTI.getIndexedType()));
+ Index += APInt(Index.getBitWidth(), (uint64_t)GEPOffset,
+ /*isSigned*/true);
+ // Check if the result can be stored in our int64_t offset.
+ if (!Index.isSignedIntN(sizeof(GEPOffset) * 8)) {
+ DEBUG(dbgs() << "WARNING: Encountered a cumulative offset exceeding "
+ << "what can be represented in an int64_t!\n"
+ << " alloca: " << P.AI << "\n");
+ return false;
+ }
+
+ GEPOffset = Index.getSExtValue();
+ }
+ return true;
+ }
+
+ Value *foldSelectInst(SelectInst &SI) {
+ // If the condition being selected on is a constant or the same value is
+ // being selected between, fold the select. Yes this does (rarely) happen
+ // early on.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
+ return SI.getOperand(1+CI->isZero());
+ if (SI.getOperand(1) == SI.getOperand(2)) {
+ assert(*U == SI.getOperand(1));
+ return SI.getOperand(1);
+ }
+ return 0;
+ }
+};
+
+/// \brief Builder for the alloca partitioning.
+///
+/// This class builds an alloca partitioning by recursively visiting the uses
+/// of an alloca and splitting the partitions for each load and store at each
+/// offset.
+class AllocaPartitioning::PartitionBuilder
+ : public BuilderBase<PartitionBuilder, bool> {
+ friend class InstVisitor<PartitionBuilder, bool>;
+
+ SmallDenseMap<Instruction *, unsigned> MemTransferPartitionMap;
+
+public:
+ PartitionBuilder(const DataLayout &TD, AllocaInst &AI, AllocaPartitioning &P)
+ : BuilderBase<PartitionBuilder, bool>(TD, AI, P) {}
+
+ /// \brief Run the builder over the allocation.
+ bool operator()() {
+ // Note that we have to re-evaluate size on each trip through the loop as
+ // the queue grows at the tail.
+ for (unsigned Idx = 0; Idx < Queue.size(); ++Idx) {
+ U = Queue[Idx].U;
+ Offset = Queue[Idx].Offset;
+ if (!visit(cast<Instruction>(U->getUser())))
+ return false;
+ }
+ return true;
+ }
+
+private:
+ bool markAsEscaping(Instruction &I) {
+ P.PointerEscapingInstr = &I;
+ return false;
+ }
+
+ void insertUse(Instruction &I, int64_t Offset, uint64_t Size,
+ bool IsSplittable = false) {
+ // Completely skip uses which have a zero size or don't overlap the
+ // allocation.
+ if (Size == 0 ||
+ (Offset >= 0 && (uint64_t)Offset >= AllocSize) ||
+ (Offset < 0 && (uint64_t)-Offset >= Size)) {
+ DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset
+ << " which starts past the end of the " << AllocSize
+ << " byte alloca:\n"
+ << " alloca: " << P.AI << "\n"
+ << " use: " << I << "\n");
+ return;
+ }
+
+ // Clamp the start to the beginning of the allocation.
+ if (Offset < 0) {
+ DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
+ << " to start at the beginning of the alloca:\n"
+ << " alloca: " << P.AI << "\n"
+ << " use: " << I << "\n");
+ Size -= (uint64_t)-Offset;
+ Offset = 0;
+ }
+
+ uint64_t BeginOffset = Offset, EndOffset = BeginOffset + Size;
+
+ // Clamp the end offset to the end of the allocation. Note that this is
+ // formulated to handle even the case where "BeginOffset + Size" overflows.
+ // NOTE! This may appear superficially to be something we could ignore
+ // entirely, but that is not so! There may be PHI-node uses where some
+ // instructions are dead but not others. We can't completely ignore the
+ // PHI node, and so have to record at least the information here.
+ assert(AllocSize >= BeginOffset); // Established above.
+ if (Size > AllocSize - BeginOffset) {
+ DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
+ << " to remain within the " << AllocSize << " byte alloca:\n"
+ << " alloca: " << P.AI << "\n"
+ << " use: " << I << "\n");
+ EndOffset = AllocSize;
+ }
+
+ Partition New(BeginOffset, EndOffset, IsSplittable);
+ P.Partitions.push_back(New);
+ }
+
+ bool handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset,
+ bool IsVolatile) {
+ uint64_t Size = TD.getTypeStoreSize(Ty);
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of of the allocation, it's behavior is undefined, so simply
+ // ignore it. Note that this is more strict than the generic clamping
+ // behavior of insertUse. We also try to handle cases which might run the
+ // risk of overflow.
+ // FIXME: We should instead consider the pointer to have escaped if this
+ // function is being instrumented for addressing bugs or race conditions.
+ if (Offset < 0 || (uint64_t)Offset >= AllocSize ||
+ Size > (AllocSize - (uint64_t)Offset)) {
+ DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte "
+ << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset
+ << " which extends past the end of the " << AllocSize
+ << " byte alloca:\n"
+ << " alloca: " << P.AI << "\n"
+ << " use: " << I << "\n");
+ return true;
+ }
+
+ // We allow splitting of loads and stores where the type is an integer type
+ // and which cover the entire alloca. Such integer loads and stores
+ // often require decomposition into fine grained loads and stores.
+ bool IsSplittable = false;
+ if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+ IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8;
+
+ insertUse(I, Offset, Size, IsSplittable);
+ return true;
+ }
+
+ bool visitBitCastInst(BitCastInst &BC) {
+ enqueueUsers(BC, Offset);
+ return true;
+ }
+
+ bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+ int64_t GEPOffset;
+ if (!computeConstantGEPOffset(GEPI, GEPOffset))
+ return markAsEscaping(GEPI);
+
+ enqueueUsers(GEPI, GEPOffset);
+ return true;
+ }
+
+ bool visitLoadInst(LoadInst &LI) {
+ assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
+ "All simple FCA loads should have been pre-split");
+ return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile());
+ }
+
+ bool visitStoreInst(StoreInst &SI) {
+ Value *ValOp = SI.getValueOperand();
+ if (ValOp == *U)
+ return markAsEscaping(SI);
+
+ assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
+ "All simple FCA stores should have been pre-split");
+ return handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile());
+ }
+
+
+ bool visitMemSetInst(MemSetInst &II) {
+ assert(II.getRawDest() == *U && "Pointer use is not the destination?");
+ ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
+ uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset;
+ insertUse(II, Offset, Size, Length);
+ return true;
+ }
+
+ bool visitMemTransferInst(MemTransferInst &II) {
+ ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
+ uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset;
+ if (!Size)
+ // Zero-length mem transfer intrinsics can be ignored entirely.
+ return true;
+
+ MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
+
+ // Only intrinsics with a constant length can be split.
+ Offsets.IsSplittable = Length;
+
+ if (*U == II.getRawDest()) {
+ Offsets.DestBegin = Offset;
+ Offsets.DestEnd = Offset + Size;
+ }
+ if (*U == II.getRawSource()) {
+ Offsets.SourceBegin = Offset;
+ Offsets.SourceEnd = Offset + Size;
+ }
+
+ // If we have set up end offsets for both the source and the destination,
+ // we have found both sides of this transfer pointing at the same alloca.
+ bool SeenBothEnds = Offsets.SourceEnd && Offsets.DestEnd;
+ if (SeenBothEnds && II.getRawDest() != II.getRawSource()) {
+ unsigned PrevIdx = MemTransferPartitionMap[&II];
+
+ // Check if the begin offsets match and this is a non-volatile transfer.
+ // In that case, we can completely elide the transfer.
+ if (!II.isVolatile() && Offsets.SourceBegin == Offsets.DestBegin) {
+ P.Partitions[PrevIdx].kill();
+ return true;
+ }
+
+ // Otherwise we have an offset transfer within the same alloca. We can't
+ // split those.
+ P.Partitions[PrevIdx].IsSplittable = Offsets.IsSplittable = false;
+ } else if (SeenBothEnds) {
+ // Handle the case where this exact use provides both ends of the
+ // operation.
+ assert(II.getRawDest() == II.getRawSource());
+
+ // For non-volatile transfers this is a no-op.
+ if (!II.isVolatile())
+ return true;
+
+ // Otherwise just suppress splitting.
+ Offsets.IsSplittable = false;
+ }
+
+
+ // Insert the use now that we've fixed up the splittable nature.
+ insertUse(II, Offset, Size, Offsets.IsSplittable);
+
+ // Setup the mapping from intrinsic to partition of we've not seen both
+ // ends of this transfer.
+ if (!SeenBothEnds) {
+ unsigned NewIdx = P.Partitions.size() - 1;
+ bool Inserted
+ = MemTransferPartitionMap.insert(std::make_pair(&II, NewIdx)).second;
+ assert(Inserted &&
+ "Already have intrinsic in map but haven't seen both ends");
+ (void)Inserted;
+ }
+
+ return true;
+ }
+
+ // Disable SRoA for any intrinsics except for lifetime invariants.
+ // FIXME: What about debug instrinsics? This matches old behavior, but
+ // doesn't make sense.
+ bool visitIntrinsicInst(IntrinsicInst &II) {
+ if (II.getIntrinsicID() == Intrinsic::lifetime_start ||
+ II.getIntrinsicID() == Intrinsic::lifetime_end) {
+ ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
+ uint64_t Size = std::min(AllocSize - Offset, Length->getLimitedValue());
+ insertUse(II, Offset, Size, true);
+ return true;
+ }
+
+ return markAsEscaping(II);
+ }
+
+ Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {
+ // We consider any PHI or select that results in a direct load or store of
+ // the same offset to be a viable use for partitioning purposes. These uses
+ // are considered unsplittable and the size is the maximum loaded or stored
+ // size.
+ SmallPtrSet<Instruction *, 4> Visited;
+ SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses;
+ Visited.insert(Root);
+ Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
+ // If there are no loads or stores, the access is dead. We mark that as
+ // a size zero access.
+ Size = 0;
+ do {
+ Instruction *I, *UsedI;
+ llvm::tie(UsedI, I) = Uses.pop_back_val();
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ Size = std::max(Size, TD.getTypeStoreSize(LI->getType()));
+ continue;
+ }
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ Value *Op = SI->getOperand(0);
+ if (Op == UsedI)
+ return SI;
+ Size = std::max(Size, TD.getTypeStoreSize(Op->getType()));
+ continue;
+ }
+
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ if (!GEP->hasAllZeroIndices())
+ return GEP;
+ } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&
+ !isa<SelectInst>(I)) {
+ return I;
+ }
+
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE;
+ ++UI)
+ if (Visited.insert(cast<Instruction>(*UI)))
+ Uses.push_back(std::make_pair(I, cast<Instruction>(*UI)));
+ } while (!Uses.empty());
+
+ return 0;
+ }
+
+ bool visitPHINode(PHINode &PN) {
+ // See if we already have computed info on this node.
+ std::pair<uint64_t, bool> &PHIInfo = P.PHIOrSelectSizes[&PN];
+ if (PHIInfo.first) {
+ PHIInfo.second = true;
+ insertUse(PN, Offset, PHIInfo.first);
+ return true;
+ }
+
+ // Check for an unsafe use of the PHI node.
+ if (Instruction *EscapingI = hasUnsafePHIOrSelectUse(&PN, PHIInfo.first))
+ return markAsEscaping(*EscapingI);
+
+ insertUse(PN, Offset, PHIInfo.first);
+ return true;
+ }
+
+ bool visitSelectInst(SelectInst &SI) {
+ if (Value *Result = foldSelectInst(SI)) {
+ if (Result == *U)
+ // If the result of the constant fold will be the pointer, recurse
+ // through the select as if we had RAUW'ed it.
+ enqueueUsers(SI, Offset);
+
+ return true;
+ }
+
+ // See if we already have computed info on this node.
+ std::pair<uint64_t, bool> &SelectInfo = P.PHIOrSelectSizes[&SI];
+ if (SelectInfo.first) {
+ SelectInfo.second = true;
+ insertUse(SI, Offset, SelectInfo.first);
+ return true;
+ }
+
+ // Check for an unsafe use of the PHI node.
+ if (Instruction *EscapingI = hasUnsafePHIOrSelectUse(&SI, SelectInfo.first))
+ return markAsEscaping(*EscapingI);
+
+ insertUse(SI, Offset, SelectInfo.first);
+ return true;
+ }
+
+ /// \brief Disable SROA entirely if there are unhandled users of the alloca.
+ bool visitInstruction(Instruction &I) { return markAsEscaping(I); }
+};
+
+
+/// \brief Use adder for the alloca partitioning.
+///
+/// This class adds the uses of an alloca to all of the partitions which they
+/// use. For splittable partitions, this can end up doing essentially a linear
+/// walk of the partitions, but the number of steps remains bounded by the
+/// total result instruction size:
+/// - The number of partitions is a result of the number unsplittable
+/// instructions using the alloca.
+/// - The number of users of each partition is at worst the total number of
+/// splittable instructions using the alloca.
+/// Thus we will produce N * M instructions in the end, where N are the number
+/// of unsplittable uses and M are the number of splittable. This visitor does
+/// the exact same number of updates to the partitioning.
+///
+/// In the more common case, this visitor will leverage the fact that the
+/// partition space is pre-sorted, and do a logarithmic search for the
+/// partition needed, making the total visit a classical ((N + M) * log(N))
+/// complexity operation.
+class AllocaPartitioning::UseBuilder : public BuilderBase<UseBuilder> {
+ friend class InstVisitor<UseBuilder>;
+
+ /// \brief Set to de-duplicate dead instructions found in the use walk.
+ SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
+
+public:
+ UseBuilder(const DataLayout &TD, AllocaInst &AI, AllocaPartitioning &P)
+ : BuilderBase<UseBuilder>(TD, AI, P) {}
+
+ /// \brief Run the builder over the allocation.
+ void operator()() {
+ // Note that we have to re-evaluate size on each trip through the loop as
+ // the queue grows at the tail.
+ for (unsigned Idx = 0; Idx < Queue.size(); ++Idx) {
+ U = Queue[Idx].U;
+ Offset = Queue[Idx].Offset;
+ this->visit(cast<Instruction>(U->getUser()));
+ }
+ }
+
+private:
+ void markAsDead(Instruction &I) {
+ if (VisitedDeadInsts.insert(&I))
+ P.DeadUsers.push_back(&I);
+ }
+
+ void insertUse(Instruction &User, int64_t Offset, uint64_t Size) {
+ // If the use has a zero size or extends outside of the allocation, record
+ // it as a dead use for elimination later.
+ if (Size == 0 || (uint64_t)Offset >= AllocSize ||
+ (Offset < 0 && (uint64_t)-Offset >= Size))
+ return markAsDead(User);
+
+ // Clamp the start to the beginning of the allocation.
+ if (Offset < 0) {
+ Size -= (uint64_t)-Offset;
+ Offset = 0;
+ }
+
+ uint64_t BeginOffset = Offset, EndOffset = BeginOffset + Size;
+
+ // Clamp the end offset to the end of the allocation. Note that this is
+ // formulated to handle even the case where "BeginOffset + Size" overflows.
+ assert(AllocSize >= BeginOffset); // Established above.
+ if (Size > AllocSize - BeginOffset)
+ EndOffset = AllocSize;
+
+ // NB: This only works if we have zero overlapping partitions.
+ iterator B = std::lower_bound(P.begin(), P.end(), BeginOffset);
+ if (B != P.begin() && llvm::prior(B)->EndOffset > BeginOffset)
+ B = llvm::prior(B);
+ for (iterator I = B, E = P.end(); I != E && I->BeginOffset < EndOffset;
+ ++I) {
+ PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset),
+ std::min(I->EndOffset, EndOffset), U);
+ P.use_push_back(I, NewPU);
+ if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser()))
+ P.PHIOrSelectOpMap[U]
+ = std::make_pair(I - P.begin(), P.Uses[I - P.begin()].size() - 1);
+ }
+ }
+
+ void handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset) {
+ uint64_t Size = TD.getTypeStoreSize(Ty);
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of of the allocation, it's behavior is undefined, so simply
+ // ignore it. Note that this is more strict than the generic clamping
+ // behavior of insertUse.
+ if (Offset < 0 || (uint64_t)Offset >= AllocSize ||
+ Size > (AllocSize - (uint64_t)Offset))
+ return markAsDead(I);
+
+ insertUse(I, Offset, Size);
+ }
+
+ void visitBitCastInst(BitCastInst &BC) {
+ if (BC.use_empty())
+ return markAsDead(BC);
+
+ enqueueUsers(BC, Offset);
+ }
+
+ void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+ if (GEPI.use_empty())
+ return markAsDead(GEPI);
+
+ int64_t GEPOffset;
+ if (!computeConstantGEPOffset(GEPI, GEPOffset))
+ llvm_unreachable("Unable to compute constant offset for use");
+
+ enqueueUsers(GEPI, GEPOffset);
+ }
+
+ void visitLoadInst(LoadInst &LI) {
+ handleLoadOrStore(LI.getType(), LI, Offset);
+ }
+
+ void visitStoreInst(StoreInst &SI) {
+ handleLoadOrStore(SI.getOperand(0)->getType(), SI, Offset);
+ }
+
+ void visitMemSetInst(MemSetInst &II) {
+ ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
+ uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset;
+ insertUse(II, Offset, Size);
+ }
+
+ void visitMemTransferInst(MemTransferInst &II) {
+ ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
+ uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset;
+ if (!Size)
+ return markAsDead(II);
+
+ MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
+ if (!II.isVolatile() && Offsets.DestEnd && Offsets.SourceEnd &&
+ Offsets.DestBegin == Offsets.SourceBegin)
+ return markAsDead(II); // Skip identity transfers without side-effects.
+
+ insertUse(II, Offset, Size);
+ }
+
+ void visitIntrinsicInst(IntrinsicInst &II) {
+ assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||
+ II.getIntrinsicID() == Intrinsic::lifetime_end);
+
+ ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
+ insertUse(II, Offset,
+ std::min(AllocSize - Offset, Length->getLimitedValue()));
+ }
+
+ void insertPHIOrSelect(Instruction &User, uint64_t Offset) {
+ uint64_t Size = P.PHIOrSelectSizes.lookup(&User).first;
+
+ // For PHI and select operands outside the alloca, we can't nuke the entire
+ // phi or select -- the other side might still be relevant, so we special
+ // case them here and use a separate structure to track the operands
+ // themselves which should be replaced with undef.
+ if (Offset >= AllocSize) {
+ P.DeadOperands.push_back(U);
+ return;
+ }
+
+ insertUse(User, Offset, Size);
+ }
+ void visitPHINode(PHINode &PN) {
+ if (PN.use_empty())
+ return markAsDead(PN);
+
+ insertPHIOrSelect(PN, Offset);
+ }
+ void visitSelectInst(SelectInst &SI) {
+ if (SI.use_empty())
+ return markAsDead(SI);
+
+ if (Value *Result = foldSelectInst(SI)) {
+ if (Result == *U)
+ // If the result of the constant fold will be the pointer, recurse
+ // through the select as if we had RAUW'ed it.
+ enqueueUsers(SI, Offset);
+ else
+ // Otherwise the operand to the select is dead, and we can replace it
+ // with undef.
+ P.DeadOperands.push_back(U);
+
+ return;
+ }
+
+ insertPHIOrSelect(SI, Offset);
+ }
+
+ /// \brief Unreachable, we've already visited the alloca once.
+ void visitInstruction(Instruction &I) {
+ llvm_unreachable("Unhandled instruction in use builder.");
+ }
+};
+
+void AllocaPartitioning::splitAndMergePartitions() {
+ size_t NumDeadPartitions = 0;
+
+ // Track the range of splittable partitions that we pass when accumulating
+ // overlapping unsplittable partitions.
+ uint64_t SplitEndOffset = 0ull;
+
+ Partition New(0ull, 0ull, false);
+
+ for (unsigned i = 0, j = i, e = Partitions.size(); i != e; i = j) {
+ ++j;
+
+ if (!Partitions[i].IsSplittable || New.BeginOffset == New.EndOffset) {
+ assert(New.BeginOffset == New.EndOffset);
+ New = Partitions[i];
+ } else {
+ assert(New.IsSplittable);
+ New.EndOffset = std::max(New.EndOffset, Partitions[i].EndOffset);
+ }
+ assert(New.BeginOffset != New.EndOffset);
+
+ // Scan the overlapping partitions.
+ while (j != e && New.EndOffset > Partitions[j].BeginOffset) {
+ // If the new partition we are forming is splittable, stop at the first
+ // unsplittable partition.
+ if (New.IsSplittable && !Partitions[j].IsSplittable)
+ break;
+
+ // Grow the new partition to include any equally splittable range. 'j' is
+ // always equally splittable when New is splittable, but when New is not
+ // splittable, we may subsume some (or part of some) splitable partition
+ // without growing the new one.
+ if (New.IsSplittable == Partitions[j].IsSplittable) {
+ New.EndOffset = std::max(New.EndOffset, Partitions[j].EndOffset);
+ } else {
+ assert(!New.IsSplittable);
+ assert(Partitions[j].IsSplittable);
+ SplitEndOffset = std::max(SplitEndOffset, Partitions[j].EndOffset);
+ }
+
+ Partitions[j].kill();
+ ++NumDeadPartitions;
+ ++j;
+ }
+
+ // If the new partition is splittable, chop off the end as soon as the
+ // unsplittable subsequent partition starts and ensure we eventually cover
+ // the splittable area.
+ if (j != e && New.IsSplittable) {
+ SplitEndOffset = std::max(SplitEndOffset, New.EndOffset);
+ New.EndOffset = std::min(New.EndOffset, Partitions[j].BeginOffset);
+ }
+
+ // Add the new partition if it differs from the original one and is
+ // non-empty. We can end up with an empty partition here if it was
+ // splittable but there is an unsplittable one that starts at the same
+ // offset.
+ if (New != Partitions[i]) {
+ if (New.BeginOffset != New.EndOffset)
+ Partitions.push_back(New);
+ // Mark the old one for removal.
+ Partitions[i].kill();
+ ++NumDeadPartitions;
+ }
+
+ New.BeginOffset = New.EndOffset;
+ if (!New.IsSplittable) {
+ New.EndOffset = std::max(New.EndOffset, SplitEndOffset);
+ if (j != e && !Partitions[j].IsSplittable)
+ New.EndOffset = std::min(New.EndOffset, Partitions[j].BeginOffset);
+ New.IsSplittable = true;
+ // If there is a trailing splittable partition which won't be fused into
+ // the next splittable partition go ahead and add it onto the partitions
+ // list.
+ if (New.BeginOffset < New.EndOffset &&
+ (j == e || !Partitions[j].IsSplittable ||
+ New.EndOffset < Partitions[j].BeginOffset)) {
+ Partitions.push_back(New);
+ New.BeginOffset = New.EndOffset = 0ull;
+ }
+ }
+ }
+
+ // Re-sort the partitions now that they have been split and merged into
+ // disjoint set of partitions. Also remove any of the dead partitions we've
+ // replaced in the process.
+ std::sort(Partitions.begin(), Partitions.end());
+ if (NumDeadPartitions) {
+ assert(Partitions.back().isDead());
+ assert((ptrdiff_t)NumDeadPartitions ==
+ std::count(Partitions.begin(), Partitions.end(), Partitions.back()));
+ }
+ Partitions.erase(Partitions.end() - NumDeadPartitions, Partitions.end());
+}
+
+AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
+ :
+#ifndef NDEBUG
+ AI(AI),
+#endif
+ PointerEscapingInstr(0) {
+ PartitionBuilder PB(TD, AI, *this);
+ if (!PB())
+ return;
+
+ // Sort the uses. This arranges for the offsets to be in ascending order,
+ // and the sizes to be in descending order.
+ std::sort(Partitions.begin(), Partitions.end());
+
+ // Remove any partitions from the back which are marked as dead.
+ while (!Partitions.empty() && Partitions.back().isDead())
+ Partitions.pop_back();
+
+ if (Partitions.size() > 1) {
+ // Intersect splittability for all partitions with equal offsets and sizes.
+ // Then remove all but the first so that we have a sequence of non-equal but
+ // potentially overlapping partitions.
+ for (iterator I = Partitions.begin(), J = I, E = Partitions.end(); I != E;
+ I = J) {
+ ++J;
+ while (J != E && *I == *J) {
+ I->IsSplittable &= J->IsSplittable;
+ ++J;
+ }
+ }
+ Partitions.erase(std::unique(Partitions.begin(), Partitions.end()),
+ Partitions.end());
+
+ // Split splittable and merge unsplittable partitions into a disjoint set
+ // of partitions over the used space of the allocation.
+ splitAndMergePartitions();
+ }
+
+ // Now build up the user lists for each of these disjoint partitions by
+ // re-walking the recursive users of the alloca.
+ Uses.resize(Partitions.size());
+ UseBuilder UB(TD, AI, *this);
+ UB();
+}
+
+Type *AllocaPartitioning::getCommonType(iterator I) const {
+ Type *Ty = 0;
+ for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
+ if (!UI->U)
+ continue; // Skip dead uses.
+ if (isa<IntrinsicInst>(*UI->U->getUser()))
+ continue;
+ if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset)
+ continue;
+
+ Type *UserTy = 0;
+ if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser())) {
+ UserTy = LI->getType();
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) {
+ UserTy = SI->getValueOperand()->getType();
+ } else {
+ return 0; // Bail if we have weird uses.
+ }
+
+ if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
+ // If the type is larger than the partition, skip it. We only encounter
+ // this for split integer operations where we want to use the type of the
+ // entity causing the split.
+ if (ITy->getBitWidth() > (I->EndOffset - I->BeginOffset)*8)
+ continue;
+
+ // If we have found an integer type use covering the alloca, use that
+ // regardless of the other types, as integers are often used for a "bucket
+ // of bits" type.
+ return ITy;
+ }
+
+ if (Ty && Ty != UserTy)
+ return 0;
+
+ Ty = UserTy;
+ }
+ return Ty;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+
+void AllocaPartitioning::print(raw_ostream &OS, const_iterator I,
+ StringRef Indent) const {
+ OS << Indent << "partition #" << (I - begin())
+ << " [" << I->BeginOffset << "," << I->EndOffset << ")"
+ << (I->IsSplittable ? " (splittable)" : "")
+ << (Uses[I - begin()].empty() ? " (zero uses)" : "")
+ << "\n";
+}
+
+void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I,
+ StringRef Indent) const {
+ for (const_use_iterator UI = use_begin(I), UE = use_end(I);
+ UI != UE; ++UI) {
+ if (!UI->U)
+ continue; // Skip dead uses.
+ OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") "
+ << "used by: " << *UI->U->getUser() << "\n";
+ if (MemTransferInst *II = dyn_cast<MemTransferInst>(UI->U->getUser())) {
+ const MemTransferOffsets &MTO = MemTransferInstData.lookup(II);
+ bool IsDest;
+ if (!MTO.IsSplittable)
+ IsDest = UI->BeginOffset == MTO.DestBegin;
+ else
+ IsDest = MTO.DestBegin != 0u;
+ OS << Indent << " (original " << (IsDest ? "dest" : "source") << ": "
+ << "[" << (IsDest ? MTO.DestBegin : MTO.SourceBegin)
+ << "," << (IsDest ? MTO.DestEnd : MTO.SourceEnd) << ")\n";
+ }
+ }
+}
+
+void AllocaPartitioning::print(raw_ostream &OS) const {
+ if (PointerEscapingInstr) {
+ OS << "No partitioning for alloca: " << AI << "\n"
+ << " A pointer to this alloca escaped by:\n"
+ << " " << *PointerEscapingInstr << "\n";
+ return;
+ }
+
+ OS << "Partitioning of alloca: " << AI << "\n";
+ unsigned Num = 0;
+ for (const_iterator I = begin(), E = end(); I != E; ++I, ++Num) {
+ print(OS, I);
+ printUsers(OS, I);
+ }
+}
+
+void AllocaPartitioning::dump(const_iterator I) const { print(dbgs(), I); }
+void AllocaPartitioning::dump() const { print(dbgs()); }
+
+#endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+
+
+namespace {
+/// \brief Implementation of LoadAndStorePromoter for promoting allocas.
+///
+/// This subclass of LoadAndStorePromoter adds overrides to handle promoting
+/// the loads and stores of an alloca instruction, as well as updating its
+/// debug information. This is used when a domtree is unavailable and thus
+/// mem2reg in its full form can't be used to handle promotion of allocas to
+/// scalar values.
+class AllocaPromoter : public LoadAndStorePromoter {
+ AllocaInst &AI;
+ DIBuilder &DIB;
+
+ SmallVector<DbgDeclareInst *, 4> DDIs;
+ SmallVector<DbgValueInst *, 4> DVIs;
+
+public:
+ AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
+ AllocaInst &AI, DIBuilder &DIB)
+ : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {}
+
+ void run(const SmallVectorImpl<Instruction*> &Insts) {
+ // Remember which alloca we're promoting (for isInstInList).
+ if (MDNode *DebugNode = MDNode::getIfExists(AI.getContext(), &AI)) {
+ for (Value::use_iterator UI = DebugNode->use_begin(),
+ UE = DebugNode->use_end();
+ UI != UE; ++UI)
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+ DDIs.push_back(DDI);
+ else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI))
+ DVIs.push_back(DVI);
+ }
+
+ LoadAndStorePromoter::run(Insts);
+ AI.eraseFromParent();
+ while (!DDIs.empty())
+ DDIs.pop_back_val()->eraseFromParent();
+ while (!DVIs.empty())
+ DVIs.pop_back_val()->eraseFromParent();
+ }
+
+ virtual bool isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &Insts) const {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->getOperand(0) == &AI;
+ return cast<StoreInst>(I)->getPointerOperand() == &AI;
+ }
+
+ virtual void updateDebugInfo(Instruction *Inst) const {
+ for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(),
+ E = DDIs.end(); I != E; ++I) {
+ DbgDeclareInst *DDI = *I;
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+ else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
+ }
+ for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(),
+ E = DVIs.end(); I != E; ++I) {
+ DbgValueInst *DVI = *I;
+ Value *Arg = NULL;
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // If an argument is zero extended then use argument directly. The ZExt
+ // may be zapped by an optimization pass in future.
+ if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
+ Arg = dyn_cast<Argument>(ZExt->getOperand(0));
+ if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
+ Arg = dyn_cast<Argument>(SExt->getOperand(0));
+ if (!Arg)
+ Arg = SI->getOperand(0);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ Arg = LI->getOperand(0);
+ } else {
+ continue;
+ }
+ Instruction *DbgVal =
+ DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()),
+ Inst);
+ DbgVal->setDebugLoc(DVI->getDebugLoc());
+ }
+ }
+};
+} // end anon namespace
+
+
+namespace {
+/// \brief An optimization pass providing Scalar Replacement of Aggregates.
+///
+/// This pass takes allocations which can be completely analyzed (that is, they
+/// don't escape) and tries to turn them into scalar SSA values. There are
+/// a few steps to this process.
+///
+/// 1) It takes allocations of aggregates and analyzes the ways in which they
+/// are used to try to split them into smaller allocations, ideally of
+/// a single scalar data type. It will split up memcpy and memset accesses
+/// as necessary and try to isolate invidual scalar accesses.
+/// 2) It will transform accesses into forms which are suitable for SSA value
+/// promotion. This can be replacing a memset with a scalar store of an
+/// integer value, or it can involve speculating operations on a PHI or
+/// select to be a PHI or select of the results.
+/// 3) Finally, this will try to detect a pattern of accesses which map cleanly
+/// onto insert and extract operations on a vector value, and convert them to
+/// this form. By doing so, it will enable promotion of vector aggregates to
+/// SSA vector values.
+class SROA : public FunctionPass {
+ const bool RequiresDomTree;
+
+ LLVMContext *C;
+ const DataLayout *TD;
+ DominatorTree *DT;
+
+ /// \brief Worklist of alloca instructions to simplify.
+ ///
+ /// Each alloca in the function is added to this. Each new alloca formed gets
+ /// added to it as well to recursively simplify unless that alloca can be
+ /// directly promoted. Finally, each time we rewrite a use of an alloca other
+ /// the one being actively rewritten, we add it back onto the list if not
+ /// already present to ensure it is re-visited.
+ SetVector<AllocaInst *, SmallVector<AllocaInst *, 16> > Worklist;
+
+ /// \brief A collection of instructions to delete.
+ /// We try to batch deletions to simplify code and make things a bit more
+ /// efficient.
+ SetVector<Instruction *, SmallVector<Instruction *, 8> > DeadInsts;
+
+ /// \brief Post-promotion worklist.
+ ///
+ /// Sometimes we discover an alloca which has a high probability of becoming
+ /// viable for SROA after a round of promotion takes place. In those cases,
+ /// the alloca is enqueued here for re-processing.
+ ///
+ /// Note that we have to be very careful to clear allocas out of this list in
+ /// the event they are deleted.
+ SetVector<AllocaInst *, SmallVector<AllocaInst *, 16> > PostPromotionWorklist;
+
+ /// \brief A collection of alloca instructions we can directly promote.
+ std::vector<AllocaInst *> PromotableAllocas;
+
+public:
+ SROA(bool RequiresDomTree = true)
+ : FunctionPass(ID), RequiresDomTree(RequiresDomTree),
+ C(0), TD(0), DT(0) {
+ initializeSROAPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F);
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ const char *getPassName() const { return "SROA"; }
+ static char ID;
+
+private:
+ friend class PHIOrSelectSpeculator;
+ friend class AllocaPartitionRewriter;
+ friend class AllocaPartitionVectorRewriter;
+
+ bool rewriteAllocaPartition(AllocaInst &AI,
+ AllocaPartitioning &P,
+ AllocaPartitioning::iterator PI);
+ bool splitAlloca(AllocaInst &AI, AllocaPartitioning &P);
+ bool runOnAlloca(AllocaInst &AI);
+ void deleteDeadInstructions(SmallPtrSet<AllocaInst *, 4> &DeletedAllocas);
+ bool promoteAllocas(Function &F);
+};
+}
+
+char SROA::ID = 0;
+
+FunctionPass *llvm::createSROAPass(bool RequiresDomTree) {
+ return new SROA(RequiresDomTree);
+}
+
+INITIALIZE_PASS_BEGIN(SROA, "sroa", "Scalar Replacement Of Aggregates",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates",
+ false, false)
+
+namespace {
+/// \brief Visitor to speculate PHIs and Selects where possible.
+class PHIOrSelectSpeculator : public InstVisitor<PHIOrSelectSpeculator> {
+ // Befriend the base class so it can delegate to private visit methods.
+ friend class llvm::InstVisitor<PHIOrSelectSpeculator>;
+
+ const DataLayout &TD;
+ AllocaPartitioning &P;
+ SROA &Pass;
+
+public:
+ PHIOrSelectSpeculator(const DataLayout &TD, AllocaPartitioning &P, SROA &Pass)
+ : TD(TD), P(P), Pass(Pass) {}
+
+ /// \brief Visit the users of an alloca partition and rewrite them.
+ void visitUsers(AllocaPartitioning::const_iterator PI) {
+ // Note that we need to use an index here as the underlying vector of uses
+ // may be grown during speculation. However, we never need to re-visit the
+ // new uses, and so we can use the initial size bound.
+ for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) {
+ const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx);
+ if (!PU.U)
+ continue; // Skip dead use.
+
+ visit(cast<Instruction>(PU.U->getUser()));
+ }
+ }
+
+private:
+ // By default, skip this instruction.
+ void visitInstruction(Instruction &I) {}
+
+ /// PHI instructions that use an alloca and are subsequently loaded can be
+ /// rewritten to load both input pointers in the pred blocks and then PHI the
+ /// results, allowing the load of the alloca to be promoted.
+ /// From this:
+ /// %P2 = phi [i32* %Alloca, i32* %Other]
+ /// %V = load i32* %P2
+ /// to:
+ /// %V1 = load i32* %Alloca -> will be mem2reg'd
+ /// ...
+ /// %V2 = load i32* %Other
+ /// ...
+ /// %V = phi [i32 %V1, i32 %V2]
+ ///
+ /// We can do this to a select if its only uses are loads and if the operands
+ /// to the select can be loaded unconditionally.
+ ///
+ /// FIXME: This should be hoisted into a generic utility, likely in
+ /// Transforms/Util/Local.h
+ bool isSafePHIToSpeculate(PHINode &PN, SmallVectorImpl<LoadInst *> &Loads) {
+ // For now, we can only do this promotion if the load is in the same block
+ // as the PHI, and if there are no stores between the phi and load.
+ // TODO: Allow recursive phi users.
+ // TODO: Allow stores.
+ BasicBlock *BB = PN.getParent();
+ unsigned MaxAlign = 0;
+ for (Value::use_iterator UI = PN.use_begin(), UE = PN.use_end();
+ UI != UE; ++UI) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ if (LI == 0 || !LI->isSimple()) return false;
+
+ // For now we only allow loads in the same block as the PHI. This is
+ // a common case that happens when instcombine merges two loads through
+ // a PHI.
+ if (LI->getParent() != BB) return false;
+
+ // Ensure that there are no instructions between the PHI and the load that
+ // could store.
+ for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI)
+ if (BBI->mayWriteToMemory())
+ return false;
+
+ MaxAlign = std::max(MaxAlign, LI->getAlignment());
+ Loads.push_back(LI);
+ }
+
+ // We can only transform this if it is safe to push the loads into the
+ // predecessor blocks. The only thing to watch out for is that we can't put
+ // a possibly trapping load in the predecessor if it is a critical edge.
+ for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num;
+ ++Idx) {
+ TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();
+ Value *InVal = PN.getIncomingValue(Idx);
+
+ // If the value is produced by the terminator of the predecessor (an
+ // invoke) or it has side-effects, there is no valid place to put a load
+ // in the predecessor.
+ if (TI == InVal || TI->mayHaveSideEffects())
+ return false;
+
+ // If the predecessor has a single successor, then the edge isn't
+ // critical.
+ if (TI->getNumSuccessors() == 1)
+ continue;
+
+ // If this pointer is always safe to load, or if we can prove that there
+ // is already a load in the block, then we can move the load to the pred
+ // block.
+ if (InVal->isDereferenceablePointer() ||
+ isSafeToLoadUnconditionally(InVal, TI, MaxAlign, &TD))
+ continue;
+
+ return false;
+ }
+
+ return true;
+ }
+
+ void visitPHINode(PHINode &PN) {
+ DEBUG(dbgs() << " original: " << PN << "\n");
+
+ SmallVector<LoadInst *, 4> Loads;
+ if (!isSafePHIToSpeculate(PN, Loads))
+ return;
+
+ assert(!Loads.empty());
+
+ Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
+ IRBuilder<> PHIBuilder(&PN);
+ PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
+ PN.getName() + ".sroa.speculated");
+
+ // Get the TBAA tag and alignment to use from one of the loads. It doesn't
+ // matter which one we get and if any differ, it doesn't matter.
+ LoadInst *SomeLoad = cast<LoadInst>(Loads.back());
+ MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
+ unsigned Align = SomeLoad->getAlignment();
+
+ // Rewrite all loads of the PN to use the new PHI.
+ do {
+ LoadInst *LI = Loads.pop_back_val();
+ LI->replaceAllUsesWith(NewPN);
+ Pass.DeadInsts.insert(LI);
+ } while (!Loads.empty());
+
+ // Inject loads into all of the pred blocks.
+ for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
+ BasicBlock *Pred = PN.getIncomingBlock(Idx);
+ TerminatorInst *TI = Pred->getTerminator();
+ Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx));
+ Value *InVal = PN.getIncomingValue(Idx);
+ IRBuilder<> PredBuilder(TI);
+
+ LoadInst *Load
+ = PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." +
+ Pred->getName()));
+ ++NumLoadsSpeculated;
+ Load->setAlignment(Align);
+ if (TBAATag)
+ Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+ NewPN->addIncoming(Load, Pred);
+
+ Instruction *Ptr = dyn_cast<Instruction>(InVal);
+ if (!Ptr)
+ // No uses to rewrite.
+ continue;
+
+ // Try to lookup and rewrite any partition uses corresponding to this phi
+ // input.
+ AllocaPartitioning::iterator PI
+ = P.findPartitionForPHIOrSelectOperand(InUse);
+ if (PI == P.end())
+ continue;
+
+ // Replace the Use in the PartitionUse for this operand with the Use
+ // inside the load.
+ AllocaPartitioning::use_iterator UI
+ = P.findPartitionUseForPHIOrSelectOperand(InUse);
+ assert(isa<PHINode>(*UI->U->getUser()));
+ UI->U = &Load->getOperandUse(Load->getPointerOperandIndex());
+ }
+ DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
+ }
+
+ /// Select instructions that use an alloca and are subsequently loaded can be
+ /// rewritten to load both input pointers and then select between the result,
+ /// allowing the load of the alloca to be promoted.
+ /// From this:
+ /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
+ /// %V = load i32* %P2
+ /// to:
+ /// %V1 = load i32* %Alloca -> will be mem2reg'd
+ /// %V2 = load i32* %Other
+ /// %V = select i1 %cond, i32 %V1, i32 %V2
+ ///
+ /// We can do this to a select if its only uses are loads and if the operand
+ /// to the select can be loaded unconditionally.
+ bool isSafeSelectToSpeculate(SelectInst &SI,
+ SmallVectorImpl<LoadInst *> &Loads) {
+ Value *TValue = SI.getTrueValue();
+ Value *FValue = SI.getFalseValue();
+ bool TDerefable = TValue->isDereferenceablePointer();
+ bool FDerefable = FValue->isDereferenceablePointer();
+
+ for (Value::use_iterator UI = SI.use_begin(), UE = SI.use_end();
+ UI != UE; ++UI) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ if (LI == 0 || !LI->isSimple()) return false;
+
+ // Both operands to the select need to be dereferencable, either
+ // absolutely (e.g. allocas) or at this point because we can see other
+ // accesses to it.
+ if (!TDerefable && !isSafeToLoadUnconditionally(TValue, LI,
+ LI->getAlignment(), &TD))
+ return false;
+ if (!FDerefable && !isSafeToLoadUnconditionally(FValue, LI,
+ LI->getAlignment(), &TD))
+ return false;
+ Loads.push_back(LI);
+ }
+
+ return true;
+ }
+
+ void visitSelectInst(SelectInst &SI) {
+ DEBUG(dbgs() << " original: " << SI << "\n");
+ IRBuilder<> IRB(&SI);
+
+ // If the select isn't safe to speculate, just use simple logic to emit it.
+ SmallVector<LoadInst *, 4> Loads;
+ if (!isSafeSelectToSpeculate(SI, Loads))
+ return;
+
+ Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };
+ AllocaPartitioning::iterator PIs[2];
+ AllocaPartitioning::PartitionUse PUs[2];
+ for (unsigned i = 0, e = 2; i != e; ++i) {
+ PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]);
+ if (PIs[i] != P.end()) {
+ // If the pointer is within the partitioning, remove the select from
+ // its uses. We'll add in the new loads below.
+ AllocaPartitioning::use_iterator UI
+ = P.findPartitionUseForPHIOrSelectOperand(Ops[i]);
+ PUs[i] = *UI;
+ // Clear out the use here so that the offsets into the use list remain
+ // stable but this use is ignored when rewriting.
+ UI->U = 0;
+ }
+ }
+
+ Value *TV = SI.getTrueValue();
+ Value *FV = SI.getFalseValue();
+ // Replace the loads of the select with a select of two loads.
+ while (!Loads.empty()) {
+ LoadInst *LI = Loads.pop_back_val();
+
+ IRB.SetInsertPoint(LI);
+ LoadInst *TL =
+ IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true");
+ LoadInst *FL =
+ IRB.CreateLoad(FV, LI->getName() + ".sroa.speculate.load.false");
+ NumLoadsSpeculated += 2;
+
+ // Transfer alignment and TBAA info if present.
+ TL->setAlignment(LI->getAlignment());
+ FL->setAlignment(LI->getAlignment());
+ if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
+ TL->setMetadata(LLVMContext::MD_tbaa, Tag);
+ FL->setMetadata(LLVMContext::MD_tbaa, Tag);
+ }
+
+ Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
+ LI->getName() + ".sroa.speculated");
+
+ LoadInst *Loads[2] = { TL, FL };
+ for (unsigned i = 0, e = 2; i != e; ++i) {
+ if (PIs[i] != P.end()) {
+ Use *LoadUse = &Loads[i]->getOperandUse(0);
+ assert(PUs[i].U->get() == LoadUse->get());
+ PUs[i].U = LoadUse;
+ P.use_push_back(PIs[i], PUs[i]);
+ }
+ }
+
+ DEBUG(dbgs() << " speculated to: " << *V << "\n");
+ LI->replaceAllUsesWith(V);
+ Pass.DeadInsts.insert(LI);
+ }
+ }
+};
+}
+
+/// \brief Accumulate the constant offsets in a GEP into a single APInt offset.
+///
+/// If the provided GEP is all-constant, the total byte offset formed by the
+/// GEP is computed and Offset is set to it. If the GEP has any non-constant
+/// operands, the function returns false and the value of Offset is unmodified.
+static bool accumulateGEPOffsets(const DataLayout &TD, GEPOperator &GEP,
+ APInt &Offset) {
+ APInt GEPOffset(Offset.getBitWidth(), 0);
+ for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
+ GTI != GTE; ++GTI) {
+ ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
+ if (!OpC)
+ return false;
+ if (OpC->isZero()) continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ unsigned ElementIdx = OpC->getZExtValue();
+ const StructLayout *SL = TD.getStructLayout(STy);
+ GEPOffset += APInt(Offset.getBitWidth(),
+ SL->getElementOffset(ElementIdx));
+ continue;
+ }
+
+ APInt TypeSize(Offset.getBitWidth(),
+ TD.getTypeAllocSize(GTI.getIndexedType()));
+ if (VectorType *VTy = dyn_cast<VectorType>(*GTI)) {
+ assert((VTy->getScalarSizeInBits() % 8) == 0 &&
+ "vector element size is not a multiple of 8, cannot GEP over it");
+ TypeSize = VTy->getScalarSizeInBits() / 8;
+ }
+
+ GEPOffset += OpC->getValue().sextOrTrunc(Offset.getBitWidth()) * TypeSize;
+ }
+ Offset = GEPOffset;
+ return true;
+}
+
+/// \brief Build a GEP out of a base pointer and indices.
+///
+/// This will return the BasePtr if that is valid, or build a new GEP
+/// instruction using the IRBuilder if GEP-ing is needed.
+static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
+ SmallVectorImpl<Value *> &Indices,
+ const Twine &Prefix) {
+ if (Indices.empty())
+ return BasePtr;
+
+ // A single zero index is a no-op, so check for this and avoid building a GEP
+ // in that case.
+ if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
+ return BasePtr;
+
+ return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx");
+}
+
+/// \brief Get a natural GEP off of the BasePtr walking through Ty toward
+/// TargetTy without changing the offset of the pointer.
+///
+/// This routine assumes we've already established a properly offset GEP with
+/// Indices, and arrived at the Ty type. The goal is to continue to GEP with
+/// zero-indices down through type layers until we find one the same as
+/// TargetTy. If we can't find one with the same type, we at least try to use
+/// one with the same size. If none of that works, we just produce the GEP as
+/// indicated by Indices to have the correct offset.
+static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD,
+ Value *BasePtr, Type *Ty, Type *TargetTy,
+ SmallVectorImpl<Value *> &Indices,
+ const Twine &Prefix) {
+ if (Ty == TargetTy)
+ return buildGEP(IRB, BasePtr, Indices, Prefix);
+
+ // See if we can descend into a struct and locate a field with the correct
+ // type.
+ unsigned NumLayers = 0;
+ Type *ElementTy = Ty;
+ do {
+ if (ElementTy->isPointerTy())
+ break;
+ if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) {
+ ElementTy = SeqTy->getElementType();
+ // Note that we use the default address space as this index is over an
+ // array or a vector, not a pointer.
+ Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(0), 0)));
+ } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {
+ if (STy->element_begin() == STy->element_end())
+ break; // Nothing left to descend into.
+ ElementTy = *STy->element_begin();
+ Indices.push_back(IRB.getInt32(0));
+ } else {
+ break;
+ }
+ ++NumLayers;
+ } while (ElementTy != TargetTy);
+ if (ElementTy != TargetTy)
+ Indices.erase(Indices.end() - NumLayers, Indices.end());
+
+ return buildGEP(IRB, BasePtr, Indices, Prefix);
+}
+
+/// \brief Recursively compute indices for a natural GEP.
+///
+/// This is the recursive step for getNaturalGEPWithOffset that walks down the
+/// element types adding appropriate indices for the GEP.
+static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
+ Value *Ptr, Type *Ty, APInt &Offset,
+ Type *TargetTy,
+ SmallVectorImpl<Value *> &Indices,
+ const Twine &Prefix) {
+ if (Offset == 0)
+ return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices, Prefix);
+
+ // We can't recurse through pointer types.
+ if (Ty->isPointerTy())
+ return 0;
+
+ // We try to analyze GEPs over vectors here, but note that these GEPs are
+ // extremely poorly defined currently. The long-term goal is to remove GEPing
+ // over a vector from the IR completely.
+ if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
+ unsigned ElementSizeInBits = VecTy->getScalarSizeInBits();
+ if (ElementSizeInBits % 8)
+ return 0; // GEPs over non-multiple of 8 size vector elements are invalid.
+ APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
+ APInt NumSkippedElements = Offset.sdiv(ElementSize);
+ if (NumSkippedElements.ugt(VecTy->getNumElements()))
+ return 0;
+ Offset -= NumSkippedElements * ElementSize;
+ Indices.push_back(IRB.getInt(NumSkippedElements));
+ return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(),
+ Offset, TargetTy, Indices, Prefix);
+ }
+
+ if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
+ Type *ElementTy = ArrTy->getElementType();
+ APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy));
+ APInt NumSkippedElements = Offset.sdiv(ElementSize);
+ if (NumSkippedElements.ugt(ArrTy->getNumElements()))
+ return 0;
+
+ Offset -= NumSkippedElements * ElementSize;
+ Indices.push_back(IRB.getInt(NumSkippedElements));
+ return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
+ Indices, Prefix);
+ }
+
+ StructType *STy = dyn_cast<StructType>(Ty);
+ if (!STy)
+ return 0;
+
+ const StructLayout *SL = TD.getStructLayout(STy);
+ uint64_t StructOffset = Offset.getZExtValue();
+ if (StructOffset >= SL->getSizeInBytes())
+ return 0;
+ unsigned Index = SL->getElementContainingOffset(StructOffset);
+ Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));
+ Type *ElementTy = STy->getElementType(Index);
+ if (Offset.uge(TD.getTypeAllocSize(ElementTy)))
+ return 0; // The offset points into alignment padding.
+
+ Indices.push_back(IRB.getInt32(Index));
+ return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
+ Indices, Prefix);
+}
+
+/// \brief Get a natural GEP from a base pointer to a particular offset and
+/// resulting in a particular type.
+///
+/// The goal is to produce a "natural" looking GEP that works with the existing
+/// composite types to arrive at the appropriate offset and element type for
+/// a pointer. TargetTy is the element type the returned GEP should point-to if
+/// possible. We recurse by decreasing Offset, adding the appropriate index to
+/// Indices, and setting Ty to the result subtype.
+///
+/// If no natural GEP can be constructed, this function returns null.
+static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
+ Value *Ptr, APInt Offset, Type *TargetTy,
+ SmallVectorImpl<Value *> &Indices,
+ const Twine &Prefix) {
+ PointerType *Ty = cast<PointerType>(Ptr->getType());
+
+ // Don't consider any GEPs through an i8* as natural unless the TargetTy is
+ // an i8.
+ if (Ty == IRB.getInt8PtrTy() && TargetTy->isIntegerTy(8))
+ return 0;
+
+ Type *ElementTy = Ty->getElementType();
+ if (!ElementTy->isSized())
+ return 0; // We can't GEP through an unsized element.
+ APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy));
+ if (ElementSize == 0)
+ return 0; // Zero-length arrays can't help us build a natural GEP.
+ APInt NumSkippedElements = Offset.sdiv(ElementSize);
+
+ Offset -= NumSkippedElements * ElementSize;
+ Indices.push_back(IRB.getInt(NumSkippedElements));
+ return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
+ Indices, Prefix);
+}
+
+/// \brief Compute an adjusted pointer from Ptr by Offset bytes where the
+/// resulting pointer has PointerTy.
+///
+/// This tries very hard to compute a "natural" GEP which arrives at the offset
+/// and produces the pointer type desired. Where it cannot, it will try to use
+/// the natural GEP to arrive at the offset and bitcast to the type. Where that
+/// fails, it will try to use an existing i8* and GEP to the byte offset and
+/// bitcast to the type.
+///
+/// The strategy for finding the more natural GEPs is to peel off layers of the
+/// pointer, walking back through bit casts and GEPs, searching for a base
+/// pointer from which we can compute a natural GEP with the desired
+/// properities. The algorithm tries to fold as many constant indices into
+/// a single GEP as possible, thus making each GEP more independent of the
+/// surrounding code.
+static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
+ Value *Ptr, APInt Offset, Type *PointerTy,
+ const Twine &Prefix) {
+ // Even though we don't look through PHI nodes, we could be called on an
+ // instruction in an unreachable block, which may be on a cycle.
+ SmallPtrSet<Value *, 4> Visited;
+ Visited.insert(Ptr);
+ SmallVector<Value *, 4> Indices;
+
+ // We may end up computing an offset pointer that has the wrong type. If we
+ // never are able to compute one directly that has the correct type, we'll
+ // fall back to it, so keep it around here.
+ Value *OffsetPtr = 0;
+
+ // Remember any i8 pointer we come across to re-use if we need to do a raw
+ // byte offset.
+ Value *Int8Ptr = 0;
+ APInt Int8PtrOffset(Offset.getBitWidth(), 0);
+
+ Type *TargetTy = PointerTy->getPointerElementType();
+
+ do {
+ // First fold any existing GEPs into the offset.
+ while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
+ APInt GEPOffset(Offset.getBitWidth(), 0);
+ if (!accumulateGEPOffsets(TD, *GEP, GEPOffset))
+ break;
+ Offset += GEPOffset;
+ Ptr = GEP->getPointerOperand();
+ if (!Visited.insert(Ptr))
+ break;
+ }
+
+ // See if we can perform a natural GEP here.
+ Indices.clear();
+ if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy,
+ Indices, Prefix)) {
+ if (P->getType() == PointerTy) {
+ // Zap any offset pointer that we ended up computing in previous rounds.
+ if (OffsetPtr && OffsetPtr->use_empty())
+ if (Instruction *I = dyn_cast<Instruction>(OffsetPtr))
+ I->eraseFromParent();
+ return P;
+ }
+ if (!OffsetPtr) {
+ OffsetPtr = P;
+ }
+ }
+
+ // Stash this pointer if we've found an i8*.
+ if (Ptr->getType()->isIntegerTy(8)) {
+ Int8Ptr = Ptr;
+ Int8PtrOffset = Offset;
+ }
+
+ // Peel off a layer of the pointer and update the offset appropriately.
+ if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
+ Ptr = cast<Operator>(Ptr)->getOperand(0);
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
+ if (GA->mayBeOverridden())
+ break;
+ Ptr = GA->getAliasee();
+ } else {
+ break;
+ }
+ assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!");
+ } while (Visited.insert(Ptr));
+
+ if (!OffsetPtr) {
+ if (!Int8Ptr) {
+ Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(),
+ Prefix + ".raw_cast");
+ Int8PtrOffset = Offset;
+ }
+
+ OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr :
+ IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
+ Prefix + ".raw_idx");
+ }
+ Ptr = OffsetPtr;
+
+ // On the off chance we were targeting i8*, guard the bitcast here.
+ if (Ptr->getType() != PointerTy)
+ Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast");
+
+ return Ptr;
+}
+
+/// \brief Test whether we can convert a value from the old to the new type.
+///
+/// This predicate should be used to guard calls to convertValue in order to
+/// ensure that we only try to convert viable values. The strategy is that we
+/// will peel off single element struct and array wrappings to get to an
+/// underlying value, and convert that value.
+static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
+ if (OldTy == NewTy)
+ return true;
+ if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
+ return false;
+ if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
+ return false;
+
+ if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
+ if (NewTy->isPointerTy() && OldTy->isPointerTy())
+ return true;
+ if (NewTy->isIntegerTy() || OldTy->isIntegerTy())
+ return true;
+ return false;
+ }
+
+ return true;
+}
+
+/// \brief Generic routine to convert an SSA value to a value of a different
+/// type.
+///
+/// This will try various different casting techniques, such as bitcasts,
+/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
+/// two types for viability with this routine.
+static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
+ Type *Ty) {
+ assert(canConvertValue(DL, V->getType(), Ty) &&
+ "Value not convertable to type");
+ if (V->getType() == Ty)
+ return V;
+ if (V->getType()->isIntegerTy() && Ty->isPointerTy())
+ return IRB.CreateIntToPtr(V, Ty);
+ if (V->getType()->isPointerTy() && Ty->isIntegerTy())
+ return IRB.CreatePtrToInt(V, Ty);
+
+ return IRB.CreateBitCast(V, Ty);
+}
+
+/// \brief Test whether the given alloca partition can be promoted to a vector.
+///
+/// This is a quick test to check whether we can rewrite a particular alloca
+/// partition (and its newly formed alloca) into a vector alloca with only
+/// whole-vector loads and stores such that it could be promoted to a vector
+/// SSA value. We only can ensure this for a limited set of operations, and we
+/// don't want to do the rewrites unless we are confident that the result will
+/// be promotable, so we have an early test here.
+static bool isVectorPromotionViable(const DataLayout &TD,
+ Type *AllocaTy,
+ AllocaPartitioning &P,
+ uint64_t PartitionBeginOffset,
+ uint64_t PartitionEndOffset,
+ AllocaPartitioning::const_use_iterator I,
+ AllocaPartitioning::const_use_iterator E) {
+ VectorType *Ty = dyn_cast<VectorType>(AllocaTy);
+ if (!Ty)
+ return false;
+
+ uint64_t VecSize = TD.getTypeSizeInBits(Ty);
+ uint64_t ElementSize = Ty->getScalarSizeInBits();
+
+ // While the definition of LLVM vectors is bitpacked, we don't support sizes
+ // that aren't byte sized.
+ if (ElementSize % 8)
+ return false;
+ assert((VecSize % 8) == 0 && "vector size not a multiple of element size?");
+ VecSize /= 8;
+ ElementSize /= 8;
+
+ for (; I != E; ++I) {
+ if (!I->U)
+ continue; // Skip dead use.
+
+ uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset;
+ uint64_t BeginIndex = BeginOffset / ElementSize;
+ if (BeginIndex * ElementSize != BeginOffset ||
+ BeginIndex >= Ty->getNumElements())
+ return false;
+ uint64_t EndOffset = I->EndOffset - PartitionBeginOffset;
+ uint64_t EndIndex = EndOffset / ElementSize;
+ if (EndIndex * ElementSize != EndOffset ||
+ EndIndex > Ty->getNumElements())
+ return false;
+
+ // FIXME: We should build shuffle vector instructions to handle
+ // non-element-sized accesses.
+ if ((EndOffset - BeginOffset) != ElementSize &&
+ (EndOffset - BeginOffset) != VecSize)
+ return false;
+
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
+ if (MI->isVolatile())
+ return false;
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
+ const AllocaPartitioning::MemTransferOffsets &MTO
+ = P.getMemTransferOffsets(*MTI);
+ if (!MTO.IsSplittable)
+ return false;
+ }
+ } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
+ // Disable vector promotion when there are loads or stores of an FCA.
+ return false;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+ if (LI->isVolatile())
+ return false;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+ if (SI->isVolatile())
+ return false;
+ } else {
+ return false;
+ }
+ }
+ return true;
+}
+
+/// \brief Test whether the given alloca partition's integer operations can be
+/// widened to promotable ones.
+///
+/// This is a quick test to check whether we can rewrite the integer loads and
+/// stores to a particular alloca into wider loads and stores and be able to
+/// promote the resulting alloca.
+static bool isIntegerWideningViable(const DataLayout &TD,
+ Type *AllocaTy,
+ uint64_t AllocBeginOffset,
+ AllocaPartitioning &P,
+ AllocaPartitioning::const_use_iterator I,
+ AllocaPartitioning::const_use_iterator E) {
+ uint64_t SizeInBits = TD.getTypeSizeInBits(AllocaTy);
+
+ // Don't try to handle allocas with bit-padding.
+ if (SizeInBits != TD.getTypeStoreSizeInBits(AllocaTy))
+ return false;
+
+ // We need to ensure that an integer type with the appropriate bitwidth can
+ // be converted to the alloca type, whatever that is. We don't want to force
+ // the alloca itself to have an integer type if there is a more suitable one.
+ Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits);
+ if (!canConvertValue(TD, AllocaTy, IntTy) ||
+ !canConvertValue(TD, IntTy, AllocaTy))
+ return false;
+
+ uint64_t Size = TD.getTypeStoreSize(AllocaTy);
+
+ // Check the uses to ensure the uses are (likely) promoteable integer uses.
+ // Also ensure that the alloca has a covering load or store. We don't want
+ // to widen the integer operotains only to fail to promote due to some other
+ // unsplittable entry (which we may make splittable later).
+ bool WholeAllocaOp = false;
+ for (; I != E; ++I) {
+ if (!I->U)
+ continue; // Skip dead use.
+
+ uint64_t RelBegin = I->BeginOffset - AllocBeginOffset;
+ uint64_t RelEnd = I->EndOffset - AllocBeginOffset;
+
+ // We can't reasonably handle cases where the load or store extends past
+ // the end of the aloca's type and into its padding.
+ if (RelEnd > Size)
+ return false;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+ if (LI->isVolatile())
+ return false;
+ if (RelBegin == 0 && RelEnd == Size)
+ WholeAllocaOp = true;
+ if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
+ if (ITy->getBitWidth() < TD.getTypeStoreSize(ITy))
+ return false;
+ continue;
+ }
+ // Non-integer loads need to be convertible from the alloca type so that
+ // they are promotable.
+ if (RelBegin != 0 || RelEnd != Size ||
+ !canConvertValue(TD, AllocaTy, LI->getType()))
+ return false;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+ Type *ValueTy = SI->getValueOperand()->getType();
+ if (SI->isVolatile())
+ return false;
+ if (RelBegin == 0 && RelEnd == Size)
+ WholeAllocaOp = true;
+ if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
+ if (ITy->getBitWidth() < TD.getTypeStoreSize(ITy))
+ return false;
+ continue;
+ }
+ // Non-integer stores need to be convertible to the alloca type so that
+ // they are promotable.
+ if (RelBegin != 0 || RelEnd != Size ||
+ !canConvertValue(TD, ValueTy, AllocaTy))
+ return false;
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
+ if (MI->isVolatile())
+ return false;
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
+ const AllocaPartitioning::MemTransferOffsets &MTO
+ = P.getMemTransferOffsets(*MTI);
+ if (!MTO.IsSplittable)
+ return false;
+ }
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->U->getUser())) {
+ if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end)
+ return false;
+ } else {
+ return false;
+ }
+ }
+ return WholeAllocaOp;
+}
+
+static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
+ IntegerType *Ty, uint64_t Offset,
+ const Twine &Name) {
+ DEBUG(dbgs() << " start: " << *V << "\n");
+ IntegerType *IntTy = cast<IntegerType>(V->getType());
+ assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
+ "Element extends past full value");
+ uint64_t ShAmt = 8*Offset;
+ if (DL.isBigEndian())
+ ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
+ if (ShAmt) {
+ V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
+ DEBUG(dbgs() << " shifted: " << *V << "\n");
+ }
+ assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
+ "Cannot extract to a larger integer!");
+ if (Ty != IntTy) {
+ V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
+ DEBUG(dbgs() << " trunced: " << *V << "\n");
+ }
+ return V;
+}
+
+static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
+ Value *V, uint64_t Offset, const Twine &Name) {
+ IntegerType *IntTy = cast<IntegerType>(Old->getType());
+ IntegerType *Ty = cast<IntegerType>(V->getType());
+ assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
+ "Cannot insert a larger integer!");
+ DEBUG(dbgs() << " start: " << *V << "\n");
+ if (Ty != IntTy) {
+ V = IRB.CreateZExt(V, IntTy, Name + ".ext");
+ DEBUG(dbgs() << " extended: " << *V << "\n");
+ }
+ assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
+ "Element store outside of alloca store");
+ uint64_t ShAmt = 8*Offset;
+ if (DL.isBigEndian())
+ ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
+ if (ShAmt) {
+ V = IRB.CreateShl(V, ShAmt, Name + ".shift");
+ DEBUG(dbgs() << " shifted: " << *V << "\n");
+ }
+
+ if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
+ APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
+ Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
+ DEBUG(dbgs() << " masked: " << *Old << "\n");
+ V = IRB.CreateOr(Old, V, Name + ".insert");
+ DEBUG(dbgs() << " inserted: " << *V << "\n");
+ }
+ return V;
+}
+
+namespace {
+/// \brief Visitor to rewrite instructions using a partition of an alloca to
+/// use a new alloca.
+///
+/// Also implements the rewriting to vector-based accesses when the partition
+/// passes the isVectorPromotionViable predicate. Most of the rewriting logic
+/// lives here.
+class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
+ bool> {
+ // Befriend the base class so it can delegate to private visit methods.
+ friend class llvm::InstVisitor<AllocaPartitionRewriter, bool>;
+
+ const DataLayout &TD;
+ AllocaPartitioning &P;
+ SROA &Pass;
+ AllocaInst &OldAI, &NewAI;
+ const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
+ Type *NewAllocaTy;
+
+ // If we are rewriting an alloca partition which can be written as pure
+ // vector operations, we stash extra information here. When VecTy is
+ // non-null, we have some strict guarantees about the rewriten alloca:
+ // - The new alloca is exactly the size of the vector type here.
+ // - The accesses all either map to the entire vector or to a single
+ // element.
+ // - The set of accessing instructions is only one of those handled above
+ // in isVectorPromotionViable. Generally these are the same access kinds
+ // which are promotable via mem2reg.
+ VectorType *VecTy;
+ Type *ElementTy;
+ uint64_t ElementSize;
+
+ // This is a convenience and flag variable that will be null unless the new
+ // alloca's integer operations should be widened to this integer type due to
+ // passing isIntegerWideningViable above. If it is non-null, the desired
+ // integer type will be stored here for easy access during rewriting.
+ IntegerType *IntTy;
+
+ // The offset of the partition user currently being rewritten.
+ uint64_t BeginOffset, EndOffset;
+ Use *OldUse;
+ Instruction *OldPtr;
+
+ // The name prefix to use when rewriting instructions for this alloca.
+ std::string NamePrefix;
+
+public:
+ AllocaPartitionRewriter(const DataLayout &TD, AllocaPartitioning &P,
+ AllocaPartitioning::iterator PI,
+ SROA &Pass, AllocaInst &OldAI, AllocaInst &NewAI,
+ uint64_t NewBeginOffset, uint64_t NewEndOffset)
+ : TD(TD), P(P), Pass(Pass),
+ OldAI(OldAI), NewAI(NewAI),
+ NewAllocaBeginOffset(NewBeginOffset),
+ NewAllocaEndOffset(NewEndOffset),
+ NewAllocaTy(NewAI.getAllocatedType()),
+ VecTy(), ElementTy(), ElementSize(), IntTy(),
+ BeginOffset(), EndOffset() {
+ }
+
+ /// \brief Visit the users of the alloca partition and rewrite them.
+ bool visitUsers(AllocaPartitioning::const_use_iterator I,
+ AllocaPartitioning::const_use_iterator E) {
+ if (isVectorPromotionViable(TD, NewAI.getAllocatedType(), P,
+ NewAllocaBeginOffset, NewAllocaEndOffset,
+ I, E)) {
+ ++NumVectorized;
+ VecTy = cast<VectorType>(NewAI.getAllocatedType());
+ ElementTy = VecTy->getElementType();
+ assert((VecTy->getScalarSizeInBits() % 8) == 0 &&
+ "Only multiple-of-8 sized vector elements are viable");
+ ElementSize = VecTy->getScalarSizeInBits() / 8;
+ } else if (isIntegerWideningViable(TD, NewAI.getAllocatedType(),
+ NewAllocaBeginOffset, P, I, E)) {
+ IntTy = Type::getIntNTy(NewAI.getContext(),
+ TD.getTypeSizeInBits(NewAI.getAllocatedType()));
+ }
+ bool CanSROA = true;
+ for (; I != E; ++I) {
+ if (!I->U)
+ continue; // Skip dead uses.
+ BeginOffset = I->BeginOffset;
+ EndOffset = I->EndOffset;
+ OldUse = I->U;
+ OldPtr = cast<Instruction>(I->U->get());
+ NamePrefix = (Twine(NewAI.getName()) + "." + Twine(BeginOffset)).str();
+ CanSROA &= visit(cast<Instruction>(I->U->getUser()));
+ }
+ if (VecTy) {
+ assert(CanSROA);
+ VecTy = 0;
+ ElementTy = 0;
+ ElementSize = 0;
+ }
+ if (IntTy) {
+ assert(CanSROA);
+ IntTy = 0;
+ }
+ return CanSROA;
+ }
+
+private:
+ // Every instruction which can end up as a user must have a rewrite rule.
+ bool visitInstruction(Instruction &I) {
+ DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n");
+ llvm_unreachable("No rewrite rule for this instruction!");
+ }
+
+ Twine getName(const Twine &Suffix) {
+ return NamePrefix + Suffix;
+ }
+
+ Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) {
+ assert(BeginOffset >= NewAllocaBeginOffset);
+ APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset);
+ return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName(""));
+ }
+
+ /// \brief Compute suitable alignment to access an offset into the new alloca.
+ unsigned getOffsetAlign(uint64_t Offset) {
+ unsigned NewAIAlign = NewAI.getAlignment();
+ if (!NewAIAlign)
+ NewAIAlign = TD.getABITypeAlignment(NewAI.getAllocatedType());
+ return MinAlign(NewAIAlign, Offset);
+ }
+
+ /// \brief Compute suitable alignment to access this partition of the new
+ /// alloca.
+ unsigned getPartitionAlign() {
+ return getOffsetAlign(BeginOffset - NewAllocaBeginOffset);
+ }
+
+ /// \brief Compute suitable alignment to access a type at an offset of the
+ /// new alloca.
+ ///
+ /// \returns zero if the type's ABI alignment is a suitable alignment,
+ /// otherwise returns the maximal suitable alignment.
+ unsigned getOffsetTypeAlign(Type *Ty, uint64_t Offset) {
+ unsigned Align = getOffsetAlign(Offset);
+ return Align == TD.getABITypeAlignment(Ty) ? 0 : Align;
+ }
+
+ /// \brief Compute suitable alignment to access a type at the beginning of
+ /// this partition of the new alloca.
+ ///
+ /// See \c getOffsetTypeAlign for details; this routine delegates to it.
+ unsigned getPartitionTypeAlign(Type *Ty) {
+ return getOffsetTypeAlign(Ty, BeginOffset - NewAllocaBeginOffset);
+ }
+
+ ConstantInt *getIndex(IRBuilder<> &IRB, uint64_t Offset) {
+ assert(VecTy && "Can only call getIndex when rewriting a vector");
+ uint64_t RelOffset = Offset - NewAllocaBeginOffset;
+ assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");
+ uint32_t Index = RelOffset / ElementSize;
+ assert(Index * ElementSize == RelOffset);
+ return IRB.getInt32(Index);
+ }
+
+ void deleteIfTriviallyDead(Value *V) {
+ Instruction *I = cast<Instruction>(V);
+ if (isInstructionTriviallyDead(I))
+ Pass.DeadInsts.insert(I);
+ }
+
+ Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) {
+ Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
+ if (LI.getType() == VecTy->getElementType() ||
+ BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
+ V = IRB.CreateExtractElement(V, getIndex(IRB, BeginOffset),
+ getName(".extract"));
+ }
+ return V;
+ }
+
+ Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
+ assert(IntTy && "We cannot insert an integer to the alloca");
+ assert(!LI.isVolatile());
+ Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
+ V = convertValue(TD, IRB, V, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ if (Offset > 0 || EndOffset < NewAllocaEndOffset)
+ V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset,
+ getName(".extract"));
+ return V;
+ }
+
+ bool visitLoadInst(LoadInst &LI) {
+ DEBUG(dbgs() << " original: " << LI << "\n");
+ Value *OldOp = LI.getOperand(0);
+ assert(OldOp == OldPtr);
+ IRBuilder<> IRB(&LI);
+
+ uint64_t Size = EndOffset - BeginOffset;
+ bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType());
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of the original allocation it's behavior is undefined. Rather
+ // than trying to transform it, just replace it with undef.
+ // FIXME: We should do something more clever for functions being
+ // instrumented by asan.
+ // FIXME: Eventually, once ASan and friends can flush out bugs here, this
+ // should be transformed to a load of null making it unreachable.
+ uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType());
+ if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) {
+ LI.replaceAllUsesWith(UndefValue::get(LI.getType()));
+ Pass.DeadInsts.insert(&LI);
+ deleteIfTriviallyDead(OldOp);
+ DEBUG(dbgs() << " to: undef!!\n");
+ return true;
+ }
+
+ Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8)
+ : LI.getType();
+ bool IsPtrAdjusted = false;
+ Value *V;
+ if (VecTy) {
+ V = rewriteVectorizedLoadInst(IRB, LI, OldOp);
+ } else if (IntTy && LI.getType()->isIntegerTy()) {
+ V = rewriteIntegerLoad(IRB, LI);
+ } else if (BeginOffset == NewAllocaBeginOffset &&
+ canConvertValue(TD, NewAllocaTy, LI.getType())) {
+ V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ LI.isVolatile(), getName(".load"));
+ } else {
+ Type *LTy = TargetTy->getPointerTo();
+ V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy),
+ getPartitionTypeAlign(TargetTy),
+ LI.isVolatile(), getName(".load"));
+ IsPtrAdjusted = true;
+ }
+ V = convertValue(TD, IRB, V, TargetTy);
+
+ if (IsSplitIntLoad) {
+ assert(!LI.isVolatile());
+ assert(LI.getType()->isIntegerTy() &&
+ "Only integer type loads and stores are split");
+ assert(LI.getType()->getIntegerBitWidth() ==
+ TD.getTypeStoreSizeInBits(LI.getType()) &&
+ "Non-byte-multiple bit width");
+ assert(LI.getType()->getIntegerBitWidth() ==
+ TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
+ "Only alloca-wide loads can be split and recomposed");
+ // Move the insertion point just past the load so that we can refer to it.
+ IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
+ // Create a placeholder value with the same type as LI to use as the
+ // basis for the new value. This allows us to replace the uses of LI with
+ // the computed value, and then replace the placeholder with LI, leaving
+ // LI only used for this computation.
+ Value *Placeholder
+ = new LoadInst(UndefValue::get(LI.getType()->getPointerTo()));
+ V = insertInteger(TD, IRB, Placeholder, V, BeginOffset,
+ getName(".insert"));
+ LI.replaceAllUsesWith(V);
+ Placeholder->replaceAllUsesWith(&LI);
+ delete Placeholder;
+ } else {
+ LI.replaceAllUsesWith(V);
+ }
+
+ Pass.DeadInsts.insert(&LI);
+ deleteIfTriviallyDead(OldOp);
+ DEBUG(dbgs() << " to: " << *V << "\n");
+ return !LI.isVolatile() && !IsPtrAdjusted;
+ }
+
+ bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, Value *V,
+ StoreInst &SI, Value *OldOp) {
+ if (V->getType() == ElementTy ||
+ BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
+ if (V->getType() != ElementTy)
+ V = convertValue(TD, IRB, V, ElementTy);
+ LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
+ V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset),
+ getName(".insert"));
+ } else if (V->getType() != VecTy) {
+ V = convertValue(TD, IRB, V, VecTy);
+ }
+ StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
+ Pass.DeadInsts.insert(&SI);
+
+ (void)Store;
+ DEBUG(dbgs() << " to: " << *Store << "\n");
+ return true;
+ }
+
+ bool rewriteIntegerStore(IRBuilder<> &IRB, Value *V, StoreInst &SI) {
+ assert(IntTy && "We cannot extract an integer from the alloca");
+ assert(!SI.isVolatile());
+ if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".oldload"));
+ Old = convertValue(TD, IRB, Old, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ V = insertInteger(TD, IRB, Old, SI.getValueOperand(), Offset,
+ getName(".insert"));
+ }
+ V = convertValue(TD, IRB, V, NewAllocaTy);
+ StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
+ Pass.DeadInsts.insert(&SI);
+ (void)Store;
+ DEBUG(dbgs() << " to: " << *Store << "\n");
+ return true;
+ }
+
+ bool visitStoreInst(StoreInst &SI) {
+ DEBUG(dbgs() << " original: " << SI << "\n");
+ Value *OldOp = SI.getOperand(1);
+ assert(OldOp == OldPtr);
+ IRBuilder<> IRB(&SI);
+
+ Value *V = SI.getValueOperand();
+
+ // Strip all inbounds GEPs and pointer casts to try to dig out any root
+ // alloca that should be re-examined after promoting this alloca.
+ if (V->getType()->isPointerTy())
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
+ Pass.PostPromotionWorklist.insert(AI);
+
+ uint64_t Size = EndOffset - BeginOffset;
+ if (Size < TD.getTypeStoreSize(V->getType())) {
+ assert(!SI.isVolatile());
+ assert(V->getType()->isIntegerTy() &&
+ "Only integer type loads and stores are split");
+ assert(V->getType()->getIntegerBitWidth() ==
+ TD.getTypeStoreSizeInBits(V->getType()) &&
+ "Non-byte-multiple bit width");
+ assert(V->getType()->getIntegerBitWidth() ==
+ TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
+ "Only alloca-wide stores can be split and recomposed");
+ IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
+ V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset,
+ getName(".extract"));
+ }
+
+ if (VecTy)
+ return rewriteVectorizedStoreInst(IRB, V, SI, OldOp);
+ if (IntTy && V->getType()->isIntegerTy())
+ return rewriteIntegerStore(IRB, V, SI);
+
+ StoreInst *NewSI;
+ if (BeginOffset == NewAllocaBeginOffset &&
+ canConvertValue(TD, V->getType(), NewAllocaTy)) {
+ V = convertValue(TD, IRB, V, NewAllocaTy);
+ NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
+ SI.isVolatile());
+ } else {
+ Value *NewPtr = getAdjustedAllocaPtr(IRB, V->getType()->getPointerTo());
+ NewSI = IRB.CreateAlignedStore(V, NewPtr,
+ getPartitionTypeAlign(V->getType()),
+ SI.isVolatile());
+ }
+ (void)NewSI;
+ Pass.DeadInsts.insert(&SI);
+ deleteIfTriviallyDead(OldOp);
+
+ DEBUG(dbgs() << " to: " << *NewSI << "\n");
+ return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile();
+ }
+
+ bool visitMemSetInst(MemSetInst &II) {
+ DEBUG(dbgs() << " original: " << II << "\n");
+ IRBuilder<> IRB(&II);
+ assert(II.getRawDest() == OldPtr);
+
+ // If the memset has a variable size, it cannot be split, just adjust the
+ // pointer to the new alloca.
+ if (!isa<Constant>(II.getLength())) {
+ II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType()));
+ Type *CstTy = II.getAlignmentCst()->getType();
+ II.setAlignment(ConstantInt::get(CstTy, getPartitionAlign()));
+
+ deleteIfTriviallyDead(OldPtr);
+ return false;
+ }
+
+ // Record this instruction for deletion.
+ Pass.DeadInsts.insert(&II);
+
+ Type *AllocaTy = NewAI.getAllocatedType();
+ Type *ScalarTy = AllocaTy->getScalarType();
+
+ // If this doesn't map cleanly onto the alloca type, and that type isn't
+ // a single value type, just emit a memset.
+ if (!VecTy && !IntTy &&
+ (BeginOffset != NewAllocaBeginOffset ||
+ EndOffset != NewAllocaEndOffset ||
+ !AllocaTy->isSingleValueType() ||
+ !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) {
+ Type *SizeTy = II.getLength()->getType();
+ Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset);
+ CallInst *New
+ = IRB.CreateMemSet(getAdjustedAllocaPtr(IRB,
+ II.getRawDest()->getType()),
+ II.getValue(), Size, getPartitionAlign(),
+ II.isVolatile());
+ (void)New;
+ DEBUG(dbgs() << " to: " << *New << "\n");
+ return false;
+ }
+
+ // If we can represent this as a simple value, we have to build the actual
+ // value to store, which requires expanding the byte present in memset to
+ // a sensible representation for the alloca type. This is essentially
+ // splatting the byte to a sufficiently wide integer, bitcasting to the
+ // desired scalar type, and splatting it across any desired vector type.
+ uint64_t Size = EndOffset - BeginOffset;
+ Value *V = II.getValue();
+ IntegerType *VTy = cast<IntegerType>(V->getType());
+ Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8);
+ if (Size*8 > VTy->getBitWidth())
+ V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")),
+ ConstantExpr::getUDiv(
+ Constant::getAllOnesValue(SplatIntTy),
+ ConstantExpr::getZExt(
+ Constant::getAllOnesValue(V->getType()),
+ SplatIntTy)),
+ getName(".isplat"));
+
+ // If this is an element-wide memset of a vectorizable alloca, insert it.
+ if (VecTy && (BeginOffset > NewAllocaBeginOffset ||
+ EndOffset < NewAllocaEndOffset)) {
+ if (V->getType() != ScalarTy)
+ V = convertValue(TD, IRB, V, ScalarTy);
+ StoreInst *Store = IRB.CreateAlignedStore(
+ IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI,
+ NewAI.getAlignment(),
+ getName(".load")),
+ V, getIndex(IRB, BeginOffset),
+ getName(".insert")),
+ &NewAI, NewAI.getAlignment());
+ (void)Store;
+ DEBUG(dbgs() << " to: " << *Store << "\n");
+ return true;
+ }
+
+ // If this is a memset on an alloca where we can widen stores, insert the
+ // set integer.
+ if (IntTy && (BeginOffset > NewAllocaBeginOffset ||
+ EndOffset < NewAllocaEndOffset)) {
+ assert(!II.isVolatile());
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".oldload"));
+ Old = convertValue(TD, IRB, Old, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert"));
+ }
+
+ if (V->getType() != AllocaTy)
+ V = convertValue(TD, IRB, V, AllocaTy);
+
+ Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
+ II.isVolatile());
+ (void)New;
+ DEBUG(dbgs() << " to: " << *New << "\n");
+ return !II.isVolatile();
+ }
+
+ bool visitMemTransferInst(MemTransferInst &II) {
+ // Rewriting of memory transfer instructions can be a bit tricky. We break
+ // them into two categories: split intrinsics and unsplit intrinsics.
+
+ DEBUG(dbgs() << " original: " << II << "\n");
+ IRBuilder<> IRB(&II);
+
+ assert(II.getRawSource() == OldPtr || II.getRawDest() == OldPtr);
+ bool IsDest = II.getRawDest() == OldPtr;
+
+ const AllocaPartitioning::MemTransferOffsets &MTO
+ = P.getMemTransferOffsets(II);
+
+ // Compute the relative offset within the transfer.
+ unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ APInt RelOffset(IntPtrWidth, BeginOffset - (IsDest ? MTO.DestBegin
+ : MTO.SourceBegin));
+
+ unsigned Align = II.getAlignment();
+ if (Align > 1)
+ Align = MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(),
+ MinAlign(II.getAlignment(), getPartitionAlign()));
+
+ // For unsplit intrinsics, we simply modify the source and destination
+ // pointers in place. This isn't just an optimization, it is a matter of
+ // correctness. With unsplit intrinsics we may be dealing with transfers
+ // within a single alloca before SROA ran, or with transfers that have
+ // a variable length. We may also be dealing with memmove instead of
+ // memcpy, and so simply updating the pointers is the necessary for us to
+ // update both source and dest of a single call.
+ if (!MTO.IsSplittable) {
+ Value *OldOp = IsDest ? II.getRawDest() : II.getRawSource();
+ if (IsDest)
+ II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType()));
+ else
+ II.setSource(getAdjustedAllocaPtr(IRB, II.getRawSource()->getType()));
+
+ Type *CstTy = II.getAlignmentCst()->getType();
+ II.setAlignment(ConstantInt::get(CstTy, Align));
+
+ DEBUG(dbgs() << " to: " << II << "\n");
+ deleteIfTriviallyDead(OldOp);
+ return false;
+ }
+ // For split transfer intrinsics we have an incredibly useful assurance:
+ // the source and destination do not reside within the same alloca, and at
+ // least one of them does not escape. This means that we can replace
+ // memmove with memcpy, and we don't need to worry about all manner of
+ // downsides to splitting and transforming the operations.
+
+ // If this doesn't map cleanly onto the alloca type, and that type isn't
+ // a single value type, just emit a memcpy.
+ bool EmitMemCpy
+ = !VecTy && !IntTy && (BeginOffset != NewAllocaBeginOffset ||
+ EndOffset != NewAllocaEndOffset ||
+ !NewAI.getAllocatedType()->isSingleValueType());
+
+ // If we're just going to emit a memcpy, the alloca hasn't changed, and the
+ // size hasn't been shrunk based on analysis of the viable range, this is
+ // a no-op.
+ if (EmitMemCpy && &OldAI == &NewAI) {
+ uint64_t OrigBegin = IsDest ? MTO.DestBegin : MTO.SourceBegin;
+ uint64_t OrigEnd = IsDest ? MTO.DestEnd : MTO.SourceEnd;
+ // Ensure the start lines up.
+ assert(BeginOffset == OrigBegin);
+ (void)OrigBegin;
+
+ // Rewrite the size as needed.
+ if (EndOffset != OrigEnd)
+ II.setLength(ConstantInt::get(II.getLength()->getType(),
+ EndOffset - BeginOffset));
+ return false;
+ }
+ // Record this instruction for deletion.
+ Pass.DeadInsts.insert(&II);
+
+ bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
+ EndOffset == NewAllocaEndOffset;
+ bool IsVectorElement = VecTy && !IsWholeAlloca;
+ uint64_t Size = EndOffset - BeginOffset;
+ IntegerType *SubIntTy
+ = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
+
+ Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
+ : II.getRawDest()->getType();
+ if (!EmitMemCpy) {
+ if (IsVectorElement)
+ OtherPtrTy = VecTy->getElementType()->getPointerTo();
+ else if (IntTy && !IsWholeAlloca)
+ OtherPtrTy = SubIntTy->getPointerTo();
+ else
+ OtherPtrTy = NewAI.getType();
+ }
+
+ // Compute the other pointer, folding as much as possible to produce
+ // a single, simple GEP in most cases.
+ Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
+ OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
+ getName("." + OtherPtr->getName()));
+
+ // Strip all inbounds GEPs and pointer casts to try to dig out any root
+ // alloca that should be re-examined after rewriting this instruction.
+ if (AllocaInst *AI
+ = dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets()))
+ Pass.Worklist.insert(AI);
+
+ if (EmitMemCpy) {
+ Value *OurPtr
+ = getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType()
+ : II.getRawSource()->getType());
+ Type *SizeTy = II.getLength()->getType();
+ Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset);
+
+ CallInst *New = IRB.CreateMemCpy(IsDest ? OurPtr : OtherPtr,
+ IsDest ? OtherPtr : OurPtr,
+ Size, Align, II.isVolatile());
+ (void)New;
+ DEBUG(dbgs() << " to: " << *New << "\n");
+ return false;
+ }
+
+ // Note that we clamp the alignment to 1 here as a 0 alignment for a memcpy
+ // is equivalent to 1, but that isn't true if we end up rewriting this as
+ // a load or store.
+ if (!Align)
+ Align = 1;
+
+ Value *SrcPtr = OtherPtr;
+ Value *DstPtr = &NewAI;
+ if (!IsDest)
+ std::swap(SrcPtr, DstPtr);
+
+ Value *Src;
+ if (IsVectorElement && !IsDest) {
+ // We have to extract rather than load.
+ Src = IRB.CreateExtractElement(
+ IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")),
+ getIndex(IRB, BeginOffset),
+ getName(".copyextract"));
+ } else if (IntTy && !IsWholeAlloca && !IsDest) {
+ Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
+ Src = convertValue(TD, IRB, Src, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, getName(".extract"));
+ } else {
+ Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
+ getName(".copyload"));
+ }
+
+ if (IntTy && !IsWholeAlloca && IsDest) {
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".oldload"));
+ Old = convertValue(TD, IRB, Old, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ Src = insertInteger(TD, IRB, Old, Src, Offset, getName(".insert"));
+ Src = convertValue(TD, IRB, Src, NewAllocaTy);
+ }
+
+ if (IsVectorElement && IsDest) {
+ // We have to insert into a loaded copy before storing.
+ Src = IRB.CreateInsertElement(
+ IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
+ Src, getIndex(IRB, BeginOffset),
+ getName(".insert"));
+ }
+
+ StoreInst *Store = cast<StoreInst>(
+ IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile()));
+ (void)Store;
+ DEBUG(dbgs() << " to: " << *Store << "\n");
+ return !II.isVolatile();
+ }
+
+ bool visitIntrinsicInst(IntrinsicInst &II) {
+ assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||
+ II.getIntrinsicID() == Intrinsic::lifetime_end);
+ DEBUG(dbgs() << " original: " << II << "\n");
+ IRBuilder<> IRB(&II);
+ assert(II.getArgOperand(1) == OldPtr);
+
+ // Record this instruction for deletion.
+ Pass.DeadInsts.insert(&II);
+
+ ConstantInt *Size
+ = ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
+ EndOffset - BeginOffset);
+ Value *Ptr = getAdjustedAllocaPtr(IRB, II.getArgOperand(1)->getType());
+ Value *New;
+ if (II.getIntrinsicID() == Intrinsic::lifetime_start)
+ New = IRB.CreateLifetimeStart(Ptr, Size);
+ else
+ New = IRB.CreateLifetimeEnd(Ptr, Size);
+
+ DEBUG(dbgs() << " to: " << *New << "\n");
+ return true;
+ }
+
+ bool visitPHINode(PHINode &PN) {
+ DEBUG(dbgs() << " original: " << PN << "\n");
+
+ // We would like to compute a new pointer in only one place, but have it be
+ // as local as possible to the PHI. To do that, we re-use the location of
+ // the old pointer, which necessarily must be in the right position to
+ // dominate the PHI.
+ IRBuilder<> PtrBuilder(cast<Instruction>(OldPtr));
+
+ Value *NewPtr = getAdjustedAllocaPtr(PtrBuilder, OldPtr->getType());
+ // Replace the operands which were using the old pointer.
+ std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr);
+
+ DEBUG(dbgs() << " to: " << PN << "\n");
+ deleteIfTriviallyDead(OldPtr);
+ return false;
+ }
+
+ bool visitSelectInst(SelectInst &SI) {
+ DEBUG(dbgs() << " original: " << SI << "\n");
+ IRBuilder<> IRB(&SI);
+
+ // Find the operand we need to rewrite here.
+ bool IsTrueVal = SI.getTrueValue() == OldPtr;
+ if (IsTrueVal)
+ assert(SI.getFalseValue() != OldPtr && "Pointer is both operands!");
+ else
+ assert(SI.getFalseValue() == OldPtr && "Pointer isn't an operand!");
+
+ Value *NewPtr = getAdjustedAllocaPtr(IRB, OldPtr->getType());
+ SI.setOperand(IsTrueVal ? 1 : 2, NewPtr);
+ DEBUG(dbgs() << " to: " << SI << "\n");
+ deleteIfTriviallyDead(OldPtr);
+ return false;
+ }
+
+};
+}
+
+namespace {
+/// \brief Visitor to rewrite aggregate loads and stores as scalar.
+///
+/// This pass aggressively rewrites all aggregate loads and stores on
+/// a particular pointer (or any pointer derived from it which we can identify)
+/// with scalar loads and stores.
+class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
+ // Befriend the base class so it can delegate to private visit methods.
+ friend class llvm::InstVisitor<AggLoadStoreRewriter, bool>;
+
+ const DataLayout &TD;
+
+ /// Queue of pointer uses to analyze and potentially rewrite.
+ SmallVector<Use *, 8> Queue;
+
+ /// Set to prevent us from cycling with phi nodes and loops.
+ SmallPtrSet<User *, 8> Visited;
+
+ /// The current pointer use being rewritten. This is used to dig up the used
+ /// value (as opposed to the user).
+ Use *U;
+
+public:
+ AggLoadStoreRewriter(const DataLayout &TD) : TD(TD) {}
+
+ /// Rewrite loads and stores through a pointer and all pointers derived from
+ /// it.
+ bool rewrite(Instruction &I) {
+ DEBUG(dbgs() << " Rewriting FCA loads and stores...\n");
+ enqueueUsers(I);
+ bool Changed = false;
+ while (!Queue.empty()) {
+ U = Queue.pop_back_val();
+ Changed |= visit(cast<Instruction>(U->getUser()));
+ }
+ return Changed;
+ }
+
+private:
+ /// Enqueue all the users of the given instruction for further processing.
+ /// This uses a set to de-duplicate users.
+ void enqueueUsers(Instruction &I) {
+ for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;
+ ++UI)
+ if (Visited.insert(*UI))
+ Queue.push_back(&UI.getUse());
+ }
+
+ // Conservative default is to not rewrite anything.
+ bool visitInstruction(Instruction &I) { return false; }
+
+ /// \brief Generic recursive split emission class.
+ template <typename Derived>
+ class OpSplitter {
+ protected:
+ /// The builder used to form new instructions.
+ IRBuilder<> IRB;
+ /// The indices which to be used with insert- or extractvalue to select the
+ /// appropriate value within the aggregate.
+ SmallVector<unsigned, 4> Indices;
+ /// The indices to a GEP instruction which will move Ptr to the correct slot
+ /// within the aggregate.
+ SmallVector<Value *, 4> GEPIndices;
+ /// The base pointer of the original op, used as a base for GEPing the
+ /// split operations.
+ Value *Ptr;
+
+ /// Initialize the splitter with an insertion point, Ptr and start with a
+ /// single zero GEP index.
+ OpSplitter(Instruction *InsertionPoint, Value *Ptr)
+ : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr) {}
+
+ public:
+ /// \brief Generic recursive split emission routine.
+ ///
+ /// This method recursively splits an aggregate op (load or store) into
+ /// scalar or vector ops. It splits recursively until it hits a single value
+ /// and emits that single value operation via the template argument.
+ ///
+ /// The logic of this routine relies on GEPs and insertvalue and
+ /// extractvalue all operating with the same fundamental index list, merely
+ /// formatted differently (GEPs need actual values).
+ ///
+ /// \param Ty The type being split recursively into smaller ops.
+ /// \param Agg The aggregate value being built up or stored, depending on
+ /// whether this is splitting a load or a store respectively.
+ void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) {
+ if (Ty->isSingleValueType())
+ return static_cast<Derived *>(this)->emitFunc(Ty, Agg, Name);
+
+ if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ unsigned OldSize = Indices.size();
+ (void)OldSize;
+ for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size;
+ ++Idx) {
+ assert(Indices.size() == OldSize && "Did not return to the old size");
+ Indices.push_back(Idx);
+ GEPIndices.push_back(IRB.getInt32(Idx));
+ emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx));
+ GEPIndices.pop_back();
+ Indices.pop_back();
+ }
+ return;
+ }
+
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ unsigned OldSize = Indices.size();
+ (void)OldSize;
+ for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size;
+ ++Idx) {
+ assert(Indices.size() == OldSize && "Did not return to the old size");
+ Indices.push_back(Idx);
+ GEPIndices.push_back(IRB.getInt32(Idx));
+ emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx));
+ GEPIndices.pop_back();
+ Indices.pop_back();
+ }
+ return;
+ }
+
+ llvm_unreachable("Only arrays and structs are aggregate loadable types");
+ }
+ };
+
+ struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
+ LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr)
+ : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr) {}
+
+ /// Emit a leaf load of a single value. This is called at the leaves of the
+ /// recursive emission to actually load values.
+ void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
+ assert(Ty->isSingleValueType());
+ // Load the single value and insert it using the indices.
+ Value *Load = IRB.CreateLoad(IRB.CreateInBoundsGEP(Ptr, GEPIndices,
+ Name + ".gep"),
+ Name + ".load");
+ Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
+ DEBUG(dbgs() << " to: " << *Load << "\n");
+ }
+ };
+
+ bool visitLoadInst(LoadInst &LI) {
+ assert(LI.getPointerOperand() == *U);
+ if (!LI.isSimple() || LI.getType()->isSingleValueType())
+ return false;
+
+ // We have an aggregate being loaded, split it apart.
+ DEBUG(dbgs() << " original: " << LI << "\n");
+ LoadOpSplitter Splitter(&LI, *U);
+ Value *V = UndefValue::get(LI.getType());
+ Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
+ LI.replaceAllUsesWith(V);
+ LI.eraseFromParent();
+ return true;
+ }
+
+ struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> {
+ StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr)
+ : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr) {}
+
+ /// Emit a leaf store of a single value. This is called at the leaves of the
+ /// recursive emission to actually produce stores.
+ void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
+ assert(Ty->isSingleValueType());
+ // Extract the single value and store it using the indices.
+ Value *Store = IRB.CreateStore(
+ IRB.CreateExtractValue(Agg, Indices, Name + ".extract"),
+ IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep"));
+ (void)Store;
+ DEBUG(dbgs() << " to: " << *Store << "\n");
+ }
+ };
+
+ bool visitStoreInst(StoreInst &SI) {
+ if (!SI.isSimple() || SI.getPointerOperand() != *U)
+ return false;
+ Value *V = SI.getValueOperand();
+ if (V->getType()->isSingleValueType())
+ return false;
+
+ // We have an aggregate being stored, split it apart.
+ DEBUG(dbgs() << " original: " << SI << "\n");
+ StoreOpSplitter Splitter(&SI, *U);
+ Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
+ SI.eraseFromParent();
+ return true;
+ }
+
+ bool visitBitCastInst(BitCastInst &BC) {
+ enqueueUsers(BC);
+ return false;
+ }
+
+ bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+ enqueueUsers(GEPI);
+ return false;
+ }
+
+ bool visitPHINode(PHINode &PN) {
+ enqueueUsers(PN);
+ return false;
+ }
+
+ bool visitSelectInst(SelectInst &SI) {
+ enqueueUsers(SI);
+ return false;
+ }
+};
+}
+
+/// \brief Strip aggregate type wrapping.
+///
+/// This removes no-op aggregate types wrapping an underlying type. It will
+/// strip as many layers of types as it can without changing either the type
+/// size or the allocated size.
+static Type *stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty) {
+ if (Ty->isSingleValueType())
+ return Ty;
+
+ uint64_t AllocSize = DL.getTypeAllocSize(Ty);
+ uint64_t TypeSize = DL.getTypeSizeInBits(Ty);
+
+ Type *InnerTy;
+ if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
+ InnerTy = ArrTy->getElementType();
+ } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ const StructLayout *SL = DL.getStructLayout(STy);
+ unsigned Index = SL->getElementContainingOffset(0);
+ InnerTy = STy->getElementType(Index);
+ } else {
+ return Ty;
+ }
+
+ if (AllocSize > DL.getTypeAllocSize(InnerTy) ||
+ TypeSize > DL.getTypeSizeInBits(InnerTy))
+ return Ty;
+
+ return stripAggregateTypeWrapping(DL, InnerTy);
+}
+
+/// \brief Try to find a partition of the aggregate type passed in for a given
+/// offset and size.
+///
+/// This recurses through the aggregate type and tries to compute a subtype
+/// based on the offset and size. When the offset and size span a sub-section
+/// of an array, it will even compute a new array type for that sub-section,
+/// and the same for structs.
+///
+/// Note that this routine is very strict and tries to find a partition of the
+/// type which produces the *exact* right offset and size. It is not forgiving
+/// when the size or offset cause either end of type-based partition to be off.
+/// Also, this is a best-effort routine. It is reasonable to give up and not
+/// return a type if necessary.
+static Type *getTypePartition(const DataLayout &TD, Type *Ty,
+ uint64_t Offset, uint64_t Size) {
+ if (Offset == 0 && TD.getTypeAllocSize(Ty) == Size)
+ return stripAggregateTypeWrapping(TD, Ty);
+ if (Offset > TD.getTypeAllocSize(Ty) ||
+ (TD.getTypeAllocSize(Ty) - Offset) < Size)
+ return 0;
+
+ if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {
+ // We can't partition pointers...
+ if (SeqTy->isPointerTy())
+ return 0;
+
+ Type *ElementTy = SeqTy->getElementType();
+ uint64_t ElementSize = TD.getTypeAllocSize(ElementTy);
+ uint64_t NumSkippedElements = Offset / ElementSize;
+ if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy))
+ if (NumSkippedElements >= ArrTy->getNumElements())
+ return 0;
+ if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy))
+ if (NumSkippedElements >= VecTy->getNumElements())
+ return 0;
+ Offset -= NumSkippedElements * ElementSize;
+
+ // First check if we need to recurse.
+ if (Offset > 0 || Size < ElementSize) {
+ // Bail if the partition ends in a different array element.
+ if ((Offset + Size) > ElementSize)
+ return 0;
+ // Recurse through the element type trying to peel off offset bytes.
+ return getTypePartition(TD, ElementTy, Offset, Size);
+ }
+ assert(Offset == 0);
+
+ if (Size == ElementSize)
+ return stripAggregateTypeWrapping(TD, ElementTy);
+ assert(Size > ElementSize);
+ uint64_t NumElements = Size / ElementSize;
+ if (NumElements * ElementSize != Size)
+ return 0;
+ return ArrayType::get(ElementTy, NumElements);
+ }
+
+ StructType *STy = dyn_cast<StructType>(Ty);
+ if (!STy)
+ return 0;
+
+ const StructLayout *SL = TD.getStructLayout(STy);
+ if (Offset >= SL->getSizeInBytes())
+ return 0;
+ uint64_t EndOffset = Offset + Size;
+ if (EndOffset > SL->getSizeInBytes())
+ return 0;
+
+ unsigned Index = SL->getElementContainingOffset(Offset);
+ Offset -= SL->getElementOffset(Index);
+
+ Type *ElementTy = STy->getElementType(Index);
+ uint64_t ElementSize = TD.getTypeAllocSize(ElementTy);
+ if (Offset >= ElementSize)
+ return 0; // The offset points into alignment padding.
+
+ // See if any partition must be contained by the element.
+ if (Offset > 0 || Size < ElementSize) {
+ if ((Offset + Size) > ElementSize)
+ return 0;
+ return getTypePartition(TD, ElementTy, Offset, Size);
+ }
+ assert(Offset == 0);
+
+ if (Size == ElementSize)
+ return stripAggregateTypeWrapping(TD, ElementTy);
+
+ StructType::element_iterator EI = STy->element_begin() + Index,
+ EE = STy->element_end();
+ if (EndOffset < SL->getSizeInBytes()) {
+ unsigned EndIndex = SL->getElementContainingOffset(EndOffset);
+ if (Index == EndIndex)
+ return 0; // Within a single element and its padding.
+
+ // Don't try to form "natural" types if the elements don't line up with the
+ // expected size.
+ // FIXME: We could potentially recurse down through the last element in the
+ // sub-struct to find a natural end point.
+ if (SL->getElementOffset(EndIndex) != EndOffset)
+ return 0;
+
+ assert(Index < EndIndex);
+ EE = STy->element_begin() + EndIndex;
+ }
+
+ // Try to build up a sub-structure.
+ StructType *SubTy = StructType::get(STy->getContext(), makeArrayRef(EI, EE),
+ STy->isPacked());
+ const StructLayout *SubSL = TD.getStructLayout(SubTy);
+ if (Size != SubSL->getSizeInBytes())
+ return 0; // The sub-struct doesn't have quite the size needed.
+
+ return SubTy;
+}
+
+/// \brief Rewrite an alloca partition's users.
+///
+/// This routine drives both of the rewriting goals of the SROA pass. It tries
+/// to rewrite uses of an alloca partition to be conducive for SSA value
+/// promotion. If the partition needs a new, more refined alloca, this will
+/// build that new alloca, preserving as much type information as possible, and
+/// rewrite the uses of the old alloca to point at the new one and have the
+/// appropriate new offsets. It also evaluates how successful the rewrite was
+/// at enabling promotion and if it was successful queues the alloca to be
+/// promoted.
+bool SROA::rewriteAllocaPartition(AllocaInst &AI,
+ AllocaPartitioning &P,
+ AllocaPartitioning::iterator PI) {
+ uint64_t AllocaSize = PI->EndOffset - PI->BeginOffset;
+ bool IsLive = false;
+ for (AllocaPartitioning::use_iterator UI = P.use_begin(PI),
+ UE = P.use_end(PI);
+ UI != UE && !IsLive; ++UI)
+ if (UI->U)
+ IsLive = true;
+ if (!IsLive)
+ return false; // No live uses left of this partition.
+
+ DEBUG(dbgs() << "Speculating PHIs and selects in partition "
+ << "[" << PI->BeginOffset << "," << PI->EndOffset << ")\n");
+
+ PHIOrSelectSpeculator Speculator(*TD, P, *this);
+ DEBUG(dbgs() << " speculating ");
+ DEBUG(P.print(dbgs(), PI, ""));
+ Speculator.visitUsers(PI);
+
+ // Try to compute a friendly type for this partition of the alloca. This
+ // won't always succeed, in which case we fall back to a legal integer type
+ // or an i8 array of an appropriate size.
+ Type *AllocaTy = 0;
+ if (Type *PartitionTy = P.getCommonType(PI))
+ if (TD->getTypeAllocSize(PartitionTy) >= AllocaSize)
+ AllocaTy = PartitionTy;
+ if (!AllocaTy)
+ if (Type *PartitionTy = getTypePartition(*TD, AI.getAllocatedType(),
+ PI->BeginOffset, AllocaSize))
+ AllocaTy = PartitionTy;
+ if ((!AllocaTy ||
+ (AllocaTy->isArrayTy() &&
+ AllocaTy->getArrayElementType()->isIntegerTy())) &&
+ TD->isLegalInteger(AllocaSize * 8))
+ AllocaTy = Type::getIntNTy(*C, AllocaSize * 8);
+ if (!AllocaTy)
+ AllocaTy = ArrayType::get(Type::getInt8Ty(*C), AllocaSize);
+ assert(TD->getTypeAllocSize(AllocaTy) >= AllocaSize);
+
+ // Check for the case where we're going to rewrite to a new alloca of the
+ // exact same type as the original, and with the same access offsets. In that
+ // case, re-use the existing alloca, but still run through the rewriter to
+ // performe phi and select speculation.
+ AllocaInst *NewAI;
+ if (AllocaTy == AI.getAllocatedType()) {
+ assert(PI->BeginOffset == 0 &&
+ "Non-zero begin offset but same alloca type");
+ assert(PI == P.begin() && "Begin offset is zero on later partition");
+ NewAI = &AI;
+ } else {
+ unsigned Alignment = AI.getAlignment();
+ if (!Alignment) {
+ // The minimum alignment which users can rely on when the explicit
+ // alignment is omitted or zero is that required by the ABI for this
+ // type.
+ Alignment = TD->getABITypeAlignment(AI.getAllocatedType());
+ }
+ Alignment = MinAlign(Alignment, PI->BeginOffset);
+ // If we will get at least this much alignment from the type alone, leave
+ // the alloca's alignment unconstrained.
+ if (Alignment <= TD->getABITypeAlignment(AllocaTy))
+ Alignment = 0;
+ NewAI = new AllocaInst(AllocaTy, 0, Alignment,
+ AI.getName() + ".sroa." + Twine(PI - P.begin()),
+ &AI);
+ ++NumNewAllocas;
+ }
+
+ DEBUG(dbgs() << "Rewriting alloca partition "
+ << "[" << PI->BeginOffset << "," << PI->EndOffset << ") to: "
+ << *NewAI << "\n");
+
+ // Track the high watermark of the post-promotion worklist. We will reset it
+ // to this point if the alloca is not in fact scheduled for promotion.
+ unsigned PPWOldSize = PostPromotionWorklist.size();
+
+ AllocaPartitionRewriter Rewriter(*TD, P, PI, *this, AI, *NewAI,
+ PI->BeginOffset, PI->EndOffset);
+ DEBUG(dbgs() << " rewriting ");
+ DEBUG(P.print(dbgs(), PI, ""));
+ bool Promotable = Rewriter.visitUsers(P.use_begin(PI), P.use_end(PI));
+ if (Promotable) {
+ DEBUG(dbgs() << " and queuing for promotion\n");
+ PromotableAllocas.push_back(NewAI);
+ } else if (NewAI != &AI) {
+ // If we can't promote the alloca, iterate on it to check for new
+ // refinements exposed by splitting the current alloca. Don't iterate on an
+ // alloca which didn't actually change and didn't get promoted.
+ Worklist.insert(NewAI);
+ }
+
+ // Drop any post-promotion work items if promotion didn't happen.
+ if (!Promotable)
+ while (PostPromotionWorklist.size() > PPWOldSize)
+ PostPromotionWorklist.pop_back();
+
+ return true;
+}
+
+/// \brief Walks the partitioning of an alloca rewriting uses of each partition.
+bool SROA::splitAlloca(AllocaInst &AI, AllocaPartitioning &P) {
+ bool Changed = false;
+ for (AllocaPartitioning::iterator PI = P.begin(), PE = P.end(); PI != PE;
+ ++PI)
+ Changed |= rewriteAllocaPartition(AI, P, PI);
+
+ return Changed;
+}
+
+/// \brief Analyze an alloca for SROA.
+///
+/// This analyzes the alloca to ensure we can reason about it, builds
+/// a partitioning of the alloca, and then hands it off to be split and
+/// rewritten as needed.
+bool SROA::runOnAlloca(AllocaInst &AI) {
+ DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
+ ++NumAllocasAnalyzed;
+
+ // Special case dead allocas, as they're trivial.
+ if (AI.use_empty()) {
+ AI.eraseFromParent();
+ return true;
+ }
+
+ // Skip alloca forms that this analysis can't handle.
+ if (AI.isArrayAllocation() || !AI.getAllocatedType()->isSized() ||
+ TD->getTypeAllocSize(AI.getAllocatedType()) == 0)
+ return false;
+
+ bool Changed = false;
+
+ // First, split any FCA loads and stores touching this alloca to promote
+ // better splitting and promotion opportunities.
+ AggLoadStoreRewriter AggRewriter(*TD);
+ Changed |= AggRewriter.rewrite(AI);
+
+ // Build the partition set using a recursive instruction-visiting builder.
+ AllocaPartitioning P(*TD, AI);
+ DEBUG(P.print(dbgs()));
+ if (P.isEscaped())
+ return Changed;
+
+ // Delete all the dead users of this alloca before splitting and rewriting it.
+ for (AllocaPartitioning::dead_user_iterator DI = P.dead_user_begin(),
+ DE = P.dead_user_end();
+ DI != DE; ++DI) {
+ Changed = true;
+ (*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType()));
+ DeadInsts.insert(*DI);
+ }
+ for (AllocaPartitioning::dead_op_iterator DO = P.dead_op_begin(),
+ DE = P.dead_op_end();
+ DO != DE; ++DO) {
+ Value *OldV = **DO;
+ // Clobber the use with an undef value.
+ **DO = UndefValue::get(OldV->getType());
+ if (Instruction *OldI = dyn_cast<Instruction>(OldV))
+ if (isInstructionTriviallyDead(OldI)) {
+ Changed = true;
+ DeadInsts.insert(OldI);
+ }
+ }
+
+ // No partitions to split. Leave the dead alloca for a later pass to clean up.
+ if (P.begin() == P.end())
+ return Changed;
+
+ return splitAlloca(AI, P) || Changed;
+}
+
+/// \brief Delete the dead instructions accumulated in this run.
+///
+/// Recursively deletes the dead instructions we've accumulated. This is done
+/// at the very end to maximize locality of the recursive delete and to
+/// minimize the problems of invalidated instruction pointers as such pointers
+/// are used heavily in the intermediate stages of the algorithm.
+///
+/// We also record the alloca instructions deleted here so that they aren't
+/// subsequently handed to mem2reg to promote.
+void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
+ while (!DeadInsts.empty()) {
+ Instruction *I = DeadInsts.pop_back_val();
+ DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
+
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+
+ for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+ if (Instruction *U = dyn_cast<Instruction>(*OI)) {
+ // Zero out the operand and see if it becomes trivially dead.
+ *OI = 0;
+ if (isInstructionTriviallyDead(U))
+ DeadInsts.insert(U);
+ }
+
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+ DeletedAllocas.insert(AI);
+
+ ++NumDeleted;
+ I->eraseFromParent();
+ }
+}
+
+/// \brief Promote the allocas, using the best available technique.
+///
+/// This attempts to promote whatever allocas have been identified as viable in
+/// the PromotableAllocas list. If that list is empty, there is nothing to do.
+/// If there is a domtree available, we attempt to promote using the full power
+/// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is
+/// based on the SSAUpdater utilities. This function returns whether any
+/// promotion occured.
+bool SROA::promoteAllocas(Function &F) {
+ if (PromotableAllocas.empty())
+ return false;
+
+ NumPromoted += PromotableAllocas.size();
+
+ if (DT && !ForceSSAUpdater) {
+ DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
+ PromoteMemToReg(PromotableAllocas, *DT);
+ PromotableAllocas.clear();
+ return true;
+ }
+
+ DEBUG(dbgs() << "Promoting allocas with SSAUpdater...\n");
+ SSAUpdater SSA;
+ DIBuilder DIB(*F.getParent());
+ SmallVector<Instruction*, 64> Insts;
+
+ for (unsigned Idx = 0, Size = PromotableAllocas.size(); Idx != Size; ++Idx) {
+ AllocaInst *AI = PromotableAllocas[Idx];
+ for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE;) {
+ Instruction *I = cast<Instruction>(*UI++);
+ // FIXME: Currently the SSAUpdater infrastructure doesn't reason about
+ // lifetime intrinsics and so we strip them (and the bitcasts+GEPs
+ // leading to them) here. Eventually it should use them to optimize the
+ // scalar values produced.
+ if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) {
+ assert(onlyUsedByLifetimeMarkers(I) &&
+ "Found a bitcast used outside of a lifetime marker.");
+ while (!I->use_empty())
+ cast<Instruction>(*I->use_begin())->eraseFromParent();
+ I->eraseFromParent();
+ continue;
+ }
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ assert(II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end);
+ II->eraseFromParent();
+ continue;
+ }
+
+ Insts.push_back(I);
+ }
+ AllocaPromoter(Insts, SSA, *AI, DIB).run(Insts);
+ Insts.clear();
+ }
+
+ PromotableAllocas.clear();
+ return true;
+}
+
+namespace {
+ /// \brief A predicate to test whether an alloca belongs to a set.
+ class IsAllocaInSet {
+ typedef SmallPtrSet<AllocaInst *, 4> SetType;
+ const SetType &Set;
+
+ public:
+ typedef AllocaInst *argument_type;
+
+ IsAllocaInSet(const SetType &Set) : Set(Set) {}
+ bool operator()(AllocaInst *AI) const { return Set.count(AI); }
+ };
+}
+
+bool SROA::runOnFunction(Function &F) {
+ DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
+ C = &F.getContext();
+ TD = getAnalysisIfAvailable<DataLayout>();
+ if (!TD) {
+ DEBUG(dbgs() << " Skipping SROA -- no target data!\n");
+ return false;
+ }
+ DT = getAnalysisIfAvailable<DominatorTree>();
+
+ BasicBlock &EntryBB = F.getEntryBlock();
+ for (BasicBlock::iterator I = EntryBB.begin(), E = llvm::prior(EntryBB.end());
+ I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+ Worklist.insert(AI);
+
+ bool Changed = false;
+ // A set of deleted alloca instruction pointers which should be removed from
+ // the list of promotable allocas.
+ SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
+
+ do {
+ while (!Worklist.empty()) {
+ Changed |= runOnAlloca(*Worklist.pop_back_val());
+ deleteDeadInstructions(DeletedAllocas);
+
+ // Remove the deleted allocas from various lists so that we don't try to
+ // continue processing them.
+ if (!DeletedAllocas.empty()) {
+ Worklist.remove_if(IsAllocaInSet(DeletedAllocas));
+ PostPromotionWorklist.remove_if(IsAllocaInSet(DeletedAllocas));
+ PromotableAllocas.erase(std::remove_if(PromotableAllocas.begin(),
+ PromotableAllocas.end(),
+ IsAllocaInSet(DeletedAllocas)),
+ PromotableAllocas.end());
+ DeletedAllocas.clear();
+ }
+ }
+
+ Changed |= promoteAllocas(F);
+
+ Worklist = PostPromotionWorklist;
+ PostPromotionWorklist.clear();
+ } while (!Worklist.empty());
+
+ return Changed;
+}
+
+void SROA::getAnalysisUsage(AnalysisUsage &AU) const {
+ if (RequiresDomTree)
+ AU.addRequired<DominatorTree>();
+ AU.setPreservesCFG();
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
index 48318c8..39630fd 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -19,7 +19,7 @@
#include "llvm/PassManager.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/Verifier.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Transforms/Scalar.h"
using namespace llvm;
@@ -59,6 +59,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeRegToMemPass(Registry);
initializeSCCPPass(Registry);
initializeIPSCCPPass(Registry);
+ initializeSROAPass(Registry);
initializeSROA_DTPass(Registry);
initializeSROA_SSAUpPass(Registry);
initializeCFGSimplifyPassPass(Registry);
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 6637126..a46d09c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -46,7 +46,7 @@
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -56,7 +56,6 @@ STATISTIC(NumReplaced, "Number of allocas broken up");
STATISTIC(NumPromoted, "Number of allocas promoted");
STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
STATISTIC(NumConverted, "Number of aggregates converted to scalar");
-STATISTIC(NumGlobals, "Number of allocas copied from constant global");
namespace {
struct SROA : public FunctionPass {
@@ -88,7 +87,7 @@ namespace {
private:
bool HasDomTree;
- TargetData *TD;
+ DataLayout *TD;
/// DeadInsts - Keep track of instructions we have made dead, so that
/// we can remove them after we are done working.
@@ -183,9 +182,6 @@ namespace {
void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
bool ShouldAttemptScalarRepl(AllocaInst *AI);
-
- static MemTransferInst *isOnlyCopiedFromConstantGlobal(
- AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete);
};
// SROA_DT - SROA that uses DominatorTree.
@@ -262,7 +258,7 @@ namespace {
class ConvertToScalarInfo {
/// AllocaSize - The size of the alloca being considered in bytes.
unsigned AllocaSize;
- const TargetData &TD;
+ const DataLayout &TD;
unsigned ScalarLoadThreshold;
/// IsNotTrivial - This is set to true if there is some access to the object
@@ -305,7 +301,7 @@ class ConvertToScalarInfo {
bool HadDynamicAccess;
public:
- explicit ConvertToScalarInfo(unsigned Size, const TargetData &td,
+ explicit ConvertToScalarInfo(unsigned Size, const DataLayout &td,
unsigned SLT)
: AllocaSize(Size), TD(td), ScalarLoadThreshold(SLT), IsNotTrivial(false),
ScalarKind(Unknown), VectorTy(0), HadNonMemTransferAccess(false),
@@ -1024,11 +1020,11 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
bool SROA::runOnFunction(Function &F) {
- TD = getAnalysisIfAvailable<TargetData>();
+ TD = getAnalysisIfAvailable<DataLayout>();
bool Changed = performPromotion(F);
- // FIXME: ScalarRepl currently depends on TargetData more than it
+ // FIXME: ScalarRepl currently depends on DataLayout more than it
// theoretically needs to. It should be refactored in order to support
// target-independent IR. Until this is done, just skip the actual
// scalar-replacement portion of this pass.
@@ -1138,7 +1134,7 @@ public:
///
/// We can do this to a select if its only uses are loads and if the operand to
/// the select can be loaded unconditionally.
-static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) {
+static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *TD) {
bool TDerefable = SI->getTrueValue()->isDereferenceablePointer();
bool FDerefable = SI->getFalseValue()->isDereferenceablePointer();
@@ -1176,7 +1172,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) {
///
/// We can do this to a select if its only uses are loads and if the operand to
/// the select can be loaded unconditionally.
-static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
+static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *TD) {
// For now, we can only do this promotion if the load is in the same block as
// the PHI, and if there are no stores between the phi and load.
// TODO: Allow recursive phi users.
@@ -1240,7 +1236,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
/// direct (non-volatile) loads and stores to it. If the alloca is close but
/// not quite there, this will transform the code to allow promotion. As such,
/// it is a non-pure predicate.
-static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
+static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *TD) {
SetVector<Instruction*, SmallVector<Instruction*, 4>,
SmallPtrSet<Instruction*, 4> > InstsToRewrite;
@@ -1465,26 +1461,6 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) {
return false;
}
-/// getPointeeAlignment - Compute the minimum alignment of the value pointed
-/// to by the given pointer.
-static unsigned getPointeeAlignment(Value *V, const TargetData &TD) {
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- if (CE->getOpcode() == Instruction::BitCast ||
- (CE->getOpcode() == Instruction::GetElementPtr &&
- cast<GEPOperator>(CE)->hasAllZeroIndices()))
- return getPointeeAlignment(CE->getOperand(0), TD);
-
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
- if (!GV->isDeclaration())
- return TD.getPreferredAlignment(GV);
-
- if (PointerType *PT = dyn_cast<PointerType>(V->getType()))
- return TD.getABITypeAlignment(PT->getElementType());
-
- return 0;
-}
-
-
// performScalarRepl - This algorithm is a simple worklist driven algorithm,
// which runs on all of the alloca instructions in the function, removing them
// if they are only used by getelementptr instructions.
@@ -1516,29 +1492,6 @@ bool SROA::performScalarRepl(Function &F) {
if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
continue;
- // Check to see if this allocation is only modified by a memcpy/memmove from
- // a constant global whose alignment is equal to or exceeds that of the
- // allocation. If this is the case, we can change all users to use
- // the constant global instead. This is commonly produced by the CFE by
- // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
- // is only subsequently read.
- SmallVector<Instruction *, 4> ToDelete;
- if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) {
- if (AI->getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) {
- DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
- DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
- for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
- ToDelete[i]->eraseFromParent();
- Constant *TheSrc = cast<Constant>(Copy->getSource());
- AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
- Copy->eraseFromParent(); // Don't mutate the global.
- AI->eraseFromParent();
- ++NumGlobals;
- Changed = true;
- continue;
- }
- }
-
// Check to see if we can perform the core SROA transformation. We cannot
// transform the allocation instruction if it is an array allocation
// (allocations OF arrays are ok though), and an allocation of a scalar
@@ -2584,7 +2537,7 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
/// HasPadding - Return true if the specified type has any structure or
/// alignment padding in between the elements that would be split apart
/// by SROA; return false otherwise.
-static bool HasPadding(Type *Ty, const TargetData &TD) {
+static bool HasPadding(Type *Ty, const DataLayout &TD) {
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
Ty = ATy->getElementType();
return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
@@ -2656,134 +2609,3 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
return true;
}
-
-
-
-/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to
-/// some part of a constant global variable. This intentionally only accepts
-/// constant expressions because we don't can't rewrite arbitrary instructions.
-static bool PointsToConstantGlobal(Value *V) {
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
- return GV->isConstant();
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- if (CE->getOpcode() == Instruction::BitCast ||
- CE->getOpcode() == Instruction::GetElementPtr)
- return PointsToConstantGlobal(CE->getOperand(0));
- return false;
-}
-
-/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
-/// pointer to an alloca. Ignore any reads of the pointer, return false if we
-/// see any stores or other unknown uses. If we see pointer arithmetic, keep
-/// track of whether it moves the pointer (with isOffset) but otherwise traverse
-/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
-/// the alloca, and if the source pointer is a pointer to a constant global, we
-/// can optimize this.
-static bool
-isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
- bool isOffset,
- SmallVector<Instruction *, 4> &LifetimeMarkers) {
- // We track lifetime intrinsics as we encounter them. If we decide to go
- // ahead and replace the value with the global, this lets the caller quickly
- // eliminate the markers.
-
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
- User *U = cast<Instruction>(*UI);
-
- if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- // Ignore non-volatile loads, they are always ok.
- if (!LI->isSimple()) return false;
- continue;
- }
-
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
- // If uses of the bitcast are ok, we are ok.
- if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset,
- LifetimeMarkers))
- return false;
- continue;
- }
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
- // If the GEP has all zero indices, it doesn't offset the pointer. If it
- // doesn't, it does.
- if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
- isOffset || !GEP->hasAllZeroIndices(),
- LifetimeMarkers))
- return false;
- continue;
- }
-
- if (CallSite CS = U) {
- // If this is the function being called then we treat it like a load and
- // ignore it.
- if (CS.isCallee(UI))
- continue;
-
- // If this is a readonly/readnone call site, then we know it is just a
- // load (but one that potentially returns the value itself), so we can
- // ignore it if we know that the value isn't captured.
- unsigned ArgNo = CS.getArgumentNo(UI);
- if (CS.onlyReadsMemory() &&
- (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
- continue;
-
- // If this is being passed as a byval argument, the caller is making a
- // copy, so it is only a read of the alloca.
- if (CS.isByValArgument(ArgNo))
- continue;
- }
-
- // Lifetime intrinsics can be handled by the caller.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end) {
- assert(II->use_empty() && "Lifetime markers have no result to use!");
- LifetimeMarkers.push_back(II);
- continue;
- }
- }
-
- // If this is isn't our memcpy/memmove, reject it as something we can't
- // handle.
- MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
- if (MI == 0)
- return false;
-
- // If the transfer is using the alloca as a source of the transfer, then
- // ignore it since it is a load (unless the transfer is volatile).
- if (UI.getOperandNo() == 1) {
- if (MI->isVolatile()) return false;
- continue;
- }
-
- // If we already have seen a copy, reject the second one.
- if (TheCopy) return false;
-
- // If the pointer has been offset from the start of the alloca, we can't
- // safely handle this.
- if (isOffset) return false;
-
- // If the memintrinsic isn't using the alloca as the dest, reject it.
- if (UI.getOperandNo() != 0) return false;
-
- // If the source of the memcpy/move is not a constant global, reject it.
- if (!PointsToConstantGlobal(MI->getSource()))
- return false;
-
- // Otherwise, the transform is safe. Remember the copy instruction.
- TheCopy = MI;
- }
- return true;
-}
-
-/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
-/// modified by a copy from a constant global. If we can prove this, we can
-/// replace any uses of the alloca with uses of the global directly.
-MemTransferInst *
-SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
- SmallVector<Instruction*, 4> &ToDelete) {
- MemTransferInst *TheCopy = 0;
- if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false, ToDelete))
- return TheCopy;
- return 0;
-}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index d13e4ab..9f24bb6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -31,10 +31,11 @@
#include "llvm/Attributes.h"
#include "llvm/Support/CFG.h"
#include "llvm/Pass.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/TargetTransformInfo.h"
using namespace llvm;
STATISTIC(NumSimpl, "Number of blocks simplified");
@@ -59,9 +60,9 @@ FunctionPass *llvm::createCFGSimplificationPass() {
return new CFGSimplifyPass();
}
-/// ChangeToUnreachable - Insert an unreachable instruction before the specified
+/// changeToUnreachable - Insert an unreachable instruction before the specified
/// instruction, making it and the rest of the code in the block dead.
-static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
+static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
BasicBlock *BB = I->getParent();
// Loop over all of the successors, removing BB's entry from any PHI
// nodes.
@@ -87,8 +88,8 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
}
}
-/// ChangeToCall - Convert the specified invoke into a normal call.
-static void ChangeToCall(InvokeInst *II) {
+/// changeToCall - Convert the specified invoke into a normal call.
+static void changeToCall(InvokeInst *II) {
SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
NewCall->takeName(II);
@@ -105,7 +106,7 @@ static void ChangeToCall(InvokeInst *II) {
II->eraseFromParent();
}
-static bool MarkAliveBlocks(BasicBlock *BB,
+static bool markAliveBlocks(BasicBlock *BB,
SmallPtrSet<BasicBlock*, 128> &Reachable) {
SmallVector<BasicBlock*, 128> Worklist;
@@ -129,7 +130,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
++BBI;
if (!isa<UnreachableInst>(BBI)) {
// Don't insert a call to llvm.trap right before the unreachable.
- ChangeToUnreachable(BBI, false);
+ changeToUnreachable(BBI, false);
Changed = true;
}
break;
@@ -148,7 +149,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
if (isa<UndefValue>(Ptr) ||
(isa<ConstantPointerNull>(Ptr) &&
SI->getPointerAddressSpace() == 0)) {
- ChangeToUnreachable(SI, true);
+ changeToUnreachable(SI, true);
Changed = true;
break;
}
@@ -159,7 +160,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
Value *Callee = II->getCalledValue();
if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
- ChangeToUnreachable(II, true);
+ changeToUnreachable(II, true);
Changed = true;
} else if (II->doesNotThrow()) {
if (II->use_empty() && II->onlyReadsMemory()) {
@@ -168,7 +169,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
II->getUnwindDest()->removePredecessor(II->getParent());
II->eraseFromParent();
} else
- ChangeToCall(II);
+ changeToCall(II);
Changed = true;
}
}
@@ -180,12 +181,12 @@ static bool MarkAliveBlocks(BasicBlock *BB,
return Changed;
}
-/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even
+/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
/// if they are in a dead cycle. Return true if a change was made, false
/// otherwise.
-static bool RemoveUnreachableBlocksFromFn(Function &F) {
+static bool removeUnreachableBlocksFromFn(Function &F) {
SmallPtrSet<BasicBlock*, 128> Reachable;
- bool Changed = MarkAliveBlocks(F.begin(), Reachable);
+ bool Changed = markAliveBlocks(F.begin(), Reachable);
// If there are unreachable blocks in the CFG...
if (Reachable.size() == F.size())
@@ -215,9 +216,9 @@ static bool RemoveUnreachableBlocksFromFn(Function &F) {
return true;
}
-/// MergeEmptyReturnBlocks - If we have more than one empty (other than phi
+/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi
/// node) return blocks, merge them together to promote recursive block merging.
-static bool MergeEmptyReturnBlocks(Function &F) {
+static bool mergeEmptyReturnBlocks(Function &F) {
bool Changed = false;
BasicBlock *RetBlock = 0;
@@ -291,9 +292,10 @@ static bool MergeEmptyReturnBlocks(Function &F) {
return Changed;
}
-/// IterativeSimplifyCFG - Call SimplifyCFG on all the blocks in the function,
+/// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function,
/// iterating until no more changes are made.
-static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) {
+static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD,
+ const TargetTransformInfo *TTI) {
bool Changed = false;
bool LocalChange = true;
while (LocalChange) {
@@ -302,7 +304,7 @@ static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) {
// Loop over all of the basic blocks and remove them if they are unneeded...
//
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
- if (SimplifyCFG(BBIt++, TD)) {
+ if (SimplifyCFG(BBIt++, TD, TTI)) {
LocalChange = true;
++NumSimpl;
}
@@ -316,25 +318,27 @@ static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) {
// simplify the CFG.
//
bool CFGSimplifyPass::runOnFunction(Function &F) {
- const TargetData *TD = getAnalysisIfAvailable<TargetData>();
- bool EverChanged = RemoveUnreachableBlocksFromFn(F);
- EverChanged |= MergeEmptyReturnBlocks(F);
- EverChanged |= IterativeSimplifyCFG(F, TD);
+ const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ const TargetTransformInfo *TTI =
+ getAnalysisIfAvailable<TargetTransformInfo>();
+ bool EverChanged = removeUnreachableBlocksFromFn(F);
+ EverChanged |= mergeEmptyReturnBlocks(F);
+ EverChanged |= iterativelySimplifyCFG(F, TD, TTI);
// If neither pass changed anything, we're done.
if (!EverChanged) return false;
- // IterativeSimplifyCFG can (rarely) make some loops dead. If this happens,
- // RemoveUnreachableBlocksFromFn is needed to nuke them, which means we should
+ // iterativelySimplifyCFG can (rarely) make some loops dead. If this happens,
+ // removeUnreachableBlocksFromFn is needed to nuke them, which means we should
// iterate between the two optimizations. We structure the code like this to
- // avoid reruning IterativeSimplifyCFG if the second pass of
- // RemoveUnreachableBlocksFromFn doesn't do anything.
- if (!RemoveUnreachableBlocksFromFn(F))
+ // avoid reruning iterativelySimplifyCFG if the second pass of
+ // removeUnreachableBlocksFromFn doesn't do anything.
+ if (!removeUnreachableBlocksFromFn(F))
return true;
do {
- EverChanged = IterativeSimplifyCFG(F, TD);
- EverChanged |= RemoveUnreachableBlocksFromFn(F);
+ EverChanged = iterativelySimplifyCFG(F, TD, TTI);
+ EverChanged |= removeUnreachableBlocksFromFn(F);
} while (EverChanged);
return true;
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index f110320..17d07cd 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -28,9 +28,10 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Config/config.h" // FIXME: Shouldn't depend on host!
using namespace llvm;
@@ -38,6 +39,10 @@ using namespace llvm;
STATISTIC(NumSimplified, "Number of library calls simplified");
STATISTIC(NumAnnotated, "Number of attributes added to library functions");
+static cl::opt<bool> UnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
+ cl::init(false),
+ cl::desc("Enable unsafe double to float "
+ "shrinking for math lib calls"));
//===----------------------------------------------------------------------===//
// Optimizer Base Class
//===----------------------------------------------------------------------===//
@@ -48,7 +53,7 @@ namespace {
class LibCallOptimization {
protected:
Function *Caller;
- const TargetData *TD;
+ const DataLayout *TD;
const TargetLibraryInfo *TLI;
LLVMContext* Context;
public:
@@ -63,7 +68,7 @@ public:
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
=0;
- Value *OptimizeCall(CallInst *CI, const TargetData *TD,
+ Value *OptimizeCall(CallInst *CI, const DataLayout *TD,
const TargetLibraryInfo *TLI, IRBuilder<> &B) {
Caller = CI->getParent()->getParent();
this->TD = TD;
@@ -85,22 +90,6 @@ public:
// Helper Functions
//===----------------------------------------------------------------------===//
-/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
-/// value is equal or not-equal to zero.
-static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
- UI != E; ++UI) {
- if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
- if (IC->isEquality())
- if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
- if (C->isNullValue())
- continue;
- // Unknown instruction.
- return false;
- }
- return true;
-}
-
static bool CallHasFloatingPointArgument(const CallInst *CI) {
for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
it != e; ++it) {
@@ -110,799 +99,62 @@ static bool CallHasFloatingPointArgument(const CallInst *CI) {
return false;
}
-/// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality
-/// comparisons with With.
-static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) {
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
- UI != E; ++UI) {
- if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
- if (IC->isEquality() && IC->getOperand(1) == With)
- continue;
- // Unknown instruction.
- return false;
- }
- return true;
-}
-
+namespace {
//===----------------------------------------------------------------------===//
-// String and Memory LibCall Optimizations
+// Math Library Optimizations
//===----------------------------------------------------------------------===//
//===---------------------------------------===//
-// 'strcat' Optimizations
-namespace {
-struct StrCatOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Verify the "strcat" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 ||
- FT->getReturnType() != B.getInt8PtrTy() ||
- FT->getParamType(0) != FT->getReturnType() ||
- FT->getParamType(1) != FT->getReturnType())
- return 0;
-
- // Extract some information from the instruction
- Value *Dst = CI->getArgOperand(0);
- Value *Src = CI->getArgOperand(1);
-
- // See if we can get the length of the input string.
- uint64_t Len = GetStringLength(Src);
- if (Len == 0) return 0;
- --Len; // Unbias length.
-
- // Handle the simple, do-nothing case: strcat(x, "") -> x
- if (Len == 0)
- return Dst;
-
- // These optimizations require TargetData.
- if (!TD) return 0;
-
- return EmitStrLenMemCpy(Src, Dst, Len, B);
- }
-
- Value *EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) {
- // We need to find the end of the destination string. That's where the
- // memory is to be moved to. We just generate a call to strlen.
- Value *DstLen = EmitStrLen(Dst, B, TD, TLI);
- if (!DstLen)
- return 0;
-
- // Now that we have the destination's length, we must index into the
- // destination's pointer to get the actual memcpy destination (end of
- // the string .. we're concatenating).
- Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
-
- // We have enough information to now generate the memcpy call to do the
- // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
- B.CreateMemCpy(CpyDst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
- return Dst;
- }
-};
-
-//===---------------------------------------===//
-// 'strncat' Optimizations
-
-struct StrNCatOpt : public StrCatOpt {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Verify the "strncat" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 ||
- FT->getReturnType() != B.getInt8PtrTy() ||
- FT->getParamType(0) != FT->getReturnType() ||
- FT->getParamType(1) != FT->getReturnType() ||
- !FT->getParamType(2)->isIntegerTy())
- return 0;
-
- // Extract some information from the instruction
- Value *Dst = CI->getArgOperand(0);
- Value *Src = CI->getArgOperand(1);
- uint64_t Len;
-
- // We don't do anything if length is not constant
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
- Len = LengthArg->getZExtValue();
- else
- return 0;
-
- // See if we can get the length of the input string.
- uint64_t SrcLen = GetStringLength(Src);
- if (SrcLen == 0) return 0;
- --SrcLen; // Unbias length.
-
- // Handle the simple, do-nothing cases:
- // strncat(x, "", c) -> x
- // strncat(x, c, 0) -> x
- if (SrcLen == 0 || Len == 0) return Dst;
-
- // These optimizations require TargetData.
- if (!TD) return 0;
-
- // We don't optimize this case
- if (Len < SrcLen) return 0;
-
- // strncat(x, s, c) -> strcat(x, s)
- // s is constant so the strcat can be optimized further
- return EmitStrLenMemCpy(Src, Dst, SrcLen, B);
- }
-};
-
-//===---------------------------------------===//
-// 'strchr' Optimizations
-
-struct StrChrOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Verify the "strchr" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 ||
- FT->getReturnType() != B.getInt8PtrTy() ||
- FT->getParamType(0) != FT->getReturnType() ||
- !FT->getParamType(1)->isIntegerTy(32))
- return 0;
-
- Value *SrcStr = CI->getArgOperand(0);
-
- // If the second operand is non-constant, see if we can compute the length
- // of the input string and turn this into memchr.
- ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
- if (CharC == 0) {
- // These optimizations require TargetData.
- if (!TD) return 0;
-
- uint64_t Len = GetStringLength(SrcStr);
- if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
- return 0;
-
- return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
- ConstantInt::get(TD->getIntPtrType(*Context), Len),
- B, TD, TLI);
- }
-
- // Otherwise, the character is a constant, see if the first argument is
- // a string literal. If so, we can constant fold.
- StringRef Str;
- if (!getConstantStringInfo(SrcStr, Str))
- return 0;
-
- // Compute the offset, make sure to handle the case when we're searching for
- // zero (a weird way to spell strlen).
- size_t I = CharC->getSExtValue() == 0 ?
- Str.size() : Str.find(CharC->getSExtValue());
- if (I == StringRef::npos) // Didn't find the char. strchr returns null.
- return Constant::getNullValue(CI->getType());
-
- // strchr(s+n,c) -> gep(s+n+i,c)
- return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
- }
-};
-
-//===---------------------------------------===//
-// 'strrchr' Optimizations
-
-struct StrRChrOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Verify the "strrchr" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 ||
- FT->getReturnType() != B.getInt8PtrTy() ||
- FT->getParamType(0) != FT->getReturnType() ||
- !FT->getParamType(1)->isIntegerTy(32))
- return 0;
-
- Value *SrcStr = CI->getArgOperand(0);
- ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
-
- // Cannot fold anything if we're not looking for a constant.
- if (!CharC)
- return 0;
-
- StringRef Str;
- if (!getConstantStringInfo(SrcStr, Str)) {
- // strrchr(s, 0) -> strchr(s, 0)
- if (TD && CharC->isZero())
- return EmitStrChr(SrcStr, '\0', B, TD, TLI);
- return 0;
- }
-
- // Compute the offset.
- size_t I = CharC->getSExtValue() == 0 ?
- Str.size() : Str.rfind(CharC->getSExtValue());
- if (I == StringRef::npos) // Didn't find the char. Return null.
- return Constant::getNullValue(CI->getType());
-
- // strrchr(s+n,c) -> gep(s+n+i,c)
- return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
- }
-};
-
-//===---------------------------------------===//
-// 'strcmp' Optimizations
-
-struct StrCmpOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Verify the "strcmp" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 ||
- !FT->getReturnType()->isIntegerTy(32) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != B.getInt8PtrTy())
- return 0;
-
- Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
- if (Str1P == Str2P) // strcmp(x,x) -> 0
- return ConstantInt::get(CI->getType(), 0);
-
- StringRef Str1, Str2;
- bool HasStr1 = getConstantStringInfo(Str1P, Str1);
- bool HasStr2 = getConstantStringInfo(Str2P, Str2);
-
- // strcmp(x, y) -> cnst (if both x and y are constant strings)
- if (HasStr1 && HasStr2)
- return ConstantInt::get(CI->getType(), Str1.compare(Str2));
-
- if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
- return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
- CI->getType()));
-
- if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
- return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
-
- // strcmp(P, "x") -> memcmp(P, "x", 2)
- uint64_t Len1 = GetStringLength(Str1P);
- uint64_t Len2 = GetStringLength(Str2P);
- if (Len1 && Len2) {
- // These optimizations require TargetData.
- if (!TD) return 0;
-
- return EmitMemCmp(Str1P, Str2P,
- ConstantInt::get(TD->getIntPtrType(*Context),
- std::min(Len1, Len2)), B, TD, TLI);
- }
-
- return 0;
- }
-};
-
-//===---------------------------------------===//
-// 'strncmp' Optimizations
-
-struct StrNCmpOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Verify the "strncmp" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 ||
- !FT->getReturnType()->isIntegerTy(32) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != B.getInt8PtrTy() ||
- !FT->getParamType(2)->isIntegerTy())
- return 0;
-
- Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
- if (Str1P == Str2P) // strncmp(x,x,n) -> 0
- return ConstantInt::get(CI->getType(), 0);
-
- // Get the length argument if it is constant.
- uint64_t Length;
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
- Length = LengthArg->getZExtValue();
- else
- return 0;
-
- if (Length == 0) // strncmp(x,y,0) -> 0
- return ConstantInt::get(CI->getType(), 0);
-
- if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
- return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI);
-
- StringRef Str1, Str2;
- bool HasStr1 = getConstantStringInfo(Str1P, Str1);
- bool HasStr2 = getConstantStringInfo(Str2P, Str2);
-
- // strncmp(x, y) -> cnst (if both x and y are constant strings)
- if (HasStr1 && HasStr2) {
- StringRef SubStr1 = Str1.substr(0, Length);
- StringRef SubStr2 = Str2.substr(0, Length);
- return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2));
- }
-
- if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x
- return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
- CI->getType()));
-
- if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
- return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
-
- return 0;
- }
-};
-
-
-//===---------------------------------------===//
-// 'strcpy' Optimizations
-
-struct StrCpyOpt : public LibCallOptimization {
- bool OptChkCall; // True if it's optimizing a __strcpy_chk libcall.
-
- StrCpyOpt(bool c) : OptChkCall(c) {}
-
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Verify the "strcpy" function prototype.
- unsigned NumParams = OptChkCall ? 3 : 2;
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != NumParams ||
- FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != B.getInt8PtrTy())
- return 0;
-
- Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
- if (Dst == Src) // strcpy(x,x) -> x
- return Src;
-
- // These optimizations require TargetData.
- if (!TD) return 0;
-
- // See if we can get the length of the input string.
- uint64_t Len = GetStringLength(Src);
- if (Len == 0) return 0;
-
- // We have enough information to now generate the memcpy call to do the
- // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
- if (!OptChkCall ||
- !EmitMemCpyChk(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len),
- CI->getArgOperand(2), B, TD, TLI))
- B.CreateMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
- return Dst;
- }
-};
-
-//===---------------------------------------===//
-// 'stpcpy' Optimizations
-
-struct StpCpyOpt: public LibCallOptimization {
- bool OptChkCall; // True if it's optimizing a __stpcpy_chk libcall.
-
- StpCpyOpt(bool c) : OptChkCall(c) {}
-
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Verify the "stpcpy" function prototype.
- unsigned NumParams = OptChkCall ? 3 : 2;
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != NumParams ||
- FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != B.getInt8PtrTy())
- return 0;
-
- // These optimizations require TargetData.
- if (!TD) return 0;
-
- Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
- if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
- Value *StrLen = EmitStrLen(Src, B, TD, TLI);
- return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
- }
-
- // See if we can get the length of the input string.
- uint64_t Len = GetStringLength(Src);
- if (Len == 0) return 0;
-
- Value *LenV = ConstantInt::get(TD->getIntPtrType(*Context), Len);
- Value *DstEnd = B.CreateGEP(Dst,
- ConstantInt::get(TD->getIntPtrType(*Context),
- Len - 1));
-
- // We have enough information to now generate the memcpy call to do the
- // copy for us. Make a memcpy to copy the nul byte with align = 1.
- if (!OptChkCall || !EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B,
- TD, TLI))
- B.CreateMemCpy(Dst, Src, LenV, 1);
- return DstEnd;
- }
-};
-
-//===---------------------------------------===//
-// 'strncpy' Optimizations
-
-struct StrNCpyOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != B.getInt8PtrTy() ||
- !FT->getParamType(2)->isIntegerTy())
- return 0;
-
- Value *Dst = CI->getArgOperand(0);
- Value *Src = CI->getArgOperand(1);
- Value *LenOp = CI->getArgOperand(2);
-
- // See if we can get the length of the input string.
- uint64_t SrcLen = GetStringLength(Src);
- if (SrcLen == 0) return 0;
- --SrcLen;
-
- if (SrcLen == 0) {
- // strncpy(x, "", y) -> memset(x, '\0', y, 1)
- B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1);
- return Dst;
- }
-
- uint64_t Len;
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp))
- Len = LengthArg->getZExtValue();
- else
- return 0;
-
- if (Len == 0) return Dst; // strncpy(x, y, 0) -> x
-
- // These optimizations require TargetData.
- if (!TD) return 0;
-
- // Let strncpy handle the zero padding
- if (Len > SrcLen+1) return 0;
-
- // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
- B.CreateMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
-
- return Dst;
- }
-};
-
-//===---------------------------------------===//
-// 'strlen' Optimizations
-
-struct StrLenOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 1 ||
- FT->getParamType(0) != B.getInt8PtrTy() ||
- !FT->getReturnType()->isIntegerTy())
- return 0;
-
- Value *Src = CI->getArgOperand(0);
-
- // Constant folding: strlen("xyz") -> 3
- if (uint64_t Len = GetStringLength(Src))
- return ConstantInt::get(CI->getType(), Len-1);
-
- // strlen(x) != 0 --> *x != 0
- // strlen(x) == 0 --> *x == 0
- if (IsOnlyUsedInZeroEqualityComparison(CI))
- return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
- return 0;
- }
-};
-
-
-//===---------------------------------------===//
-// 'strpbrk' Optimizations
-
-struct StrPBrkOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 ||
- FT->getParamType(0) != B.getInt8PtrTy() ||
- FT->getParamType(1) != FT->getParamType(0) ||
- FT->getReturnType() != FT->getParamType(0))
- return 0;
-
- StringRef S1, S2;
- bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
- bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
-
- // strpbrk(s, "") -> NULL
- // strpbrk("", s) -> NULL
- if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
- return Constant::getNullValue(CI->getType());
-
- // Constant folding.
- if (HasS1 && HasS2) {
- size_t I = S1.find_first_of(S2);
- if (I == std::string::npos) // No match.
- return Constant::getNullValue(CI->getType());
-
- return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
- }
-
- // strpbrk(s, "a") -> strchr(s, 'a')
- if (TD && HasS2 && S2.size() == 1)
- return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI);
-
- return 0;
- }
-};
-
-//===---------------------------------------===//
-// 'strto*' Optimizations. This handles strtol, strtod, strtof, strtoul, etc.
-
-struct StrToOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- FunctionType *FT = Callee->getFunctionType();
- if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy())
- return 0;
-
- Value *EndPtr = CI->getArgOperand(1);
- if (isa<ConstantPointerNull>(EndPtr)) {
- // With a null EndPtr, this function won't capture the main argument.
- // It would be readonly too, except that it still may write to errno.
- CI->addAttribute(1, Attribute::NoCapture);
- }
-
- return 0;
- }
-};
-
-//===---------------------------------------===//
-// 'strspn' Optimizations
-
-struct StrSpnOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 ||
- FT->getParamType(0) != B.getInt8PtrTy() ||
- FT->getParamType(1) != FT->getParamType(0) ||
- !FT->getReturnType()->isIntegerTy())
- return 0;
-
- StringRef S1, S2;
- bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
- bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
-
- // strspn(s, "") -> 0
- // strspn("", s) -> 0
- if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
- return Constant::getNullValue(CI->getType());
-
- // Constant folding.
- if (HasS1 && HasS2) {
- size_t Pos = S1.find_first_not_of(S2);
- if (Pos == StringRef::npos) Pos = S1.size();
- return ConstantInt::get(CI->getType(), Pos);
- }
-
- return 0;
- }
-};
-
-//===---------------------------------------===//
-// 'strcspn' Optimizations
-
-struct StrCSpnOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 ||
- FT->getParamType(0) != B.getInt8PtrTy() ||
- FT->getParamType(1) != FT->getParamType(0) ||
- !FT->getReturnType()->isIntegerTy())
- return 0;
-
- StringRef S1, S2;
- bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
- bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
-
- // strcspn("", s) -> 0
- if (HasS1 && S1.empty())
- return Constant::getNullValue(CI->getType());
-
- // Constant folding.
- if (HasS1 && HasS2) {
- size_t Pos = S1.find_first_of(S2);
- if (Pos == StringRef::npos) Pos = S1.size();
- return ConstantInt::get(CI->getType(), Pos);
- }
-
- // strcspn(s, "") -> strlen(s)
- if (TD && HasS2 && S2.empty())
- return EmitStrLen(CI->getArgOperand(0), B, TD, TLI);
-
- return 0;
- }
-};
-
-//===---------------------------------------===//
-// 'strstr' Optimizations
+// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
-struct StrStrOpt : public LibCallOptimization {
+struct UnaryDoubleFPOpt : public LibCallOptimization {
+ bool CheckRetType;
+ UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {}
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- !FT->getReturnType()->isPointerTy())
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
+ !FT->getParamType(0)->isDoubleTy())
return 0;
- // fold strstr(x, x) -> x.
- if (CI->getArgOperand(0) == CI->getArgOperand(1))
- return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
-
- // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
- if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
- Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI);
- if (!StrLen)
- return 0;
- Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
- StrLen, B, TD, TLI);
- if (!StrNCmp)
- return 0;
- for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
- UI != UE; ) {
- ICmpInst *Old = cast<ICmpInst>(*UI++);
- Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp,
- ConstantInt::getNullValue(StrNCmp->getType()),
- "cmp");
- Old->replaceAllUsesWith(Cmp);
- Old->eraseFromParent();
+ if (CheckRetType) {
+ // Check if all the uses for function like 'sin' are converted to float.
+ for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end();
+ ++UseI) {
+ FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI);
+ if (Cast == 0 || !Cast->getType()->isFloatTy())
+ return 0;
}
- return CI;
- }
-
- // See if either input string is a constant string.
- StringRef SearchStr, ToFindStr;
- bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr);
- bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr);
-
- // fold strstr(x, "") -> x.
- if (HasStr2 && ToFindStr.empty())
- return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
-
- // If both strings are known, constant fold it.
- if (HasStr1 && HasStr2) {
- std::string::size_type Offset = SearchStr.find(ToFindStr);
-
- if (Offset == StringRef::npos) // strstr("foo", "bar") -> null
- return Constant::getNullValue(CI->getType());
-
- // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
- Value *Result = CastToCStr(CI->getArgOperand(0), B);
- Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr");
- return B.CreateBitCast(Result, CI->getType());
- }
-
- // fold strstr(x, "y") -> strchr(x, 'y').
- if (HasStr2 && ToFindStr.size() == 1) {
- Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI);
- return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0;
}
- return 0;
- }
-};
-
-
-//===---------------------------------------===//
-// 'memcmp' Optimizations
-
-struct MemCmpOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- !FT->getReturnType()->isIntegerTy(32))
- return 0;
- Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
-
- if (LHS == RHS) // memcmp(s,s,x) -> 0
- return Constant::getNullValue(CI->getType());
-
- // Make sure we have a constant length.
- ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
- if (!LenC) return 0;
- uint64_t Len = LenC->getZExtValue();
-
- if (Len == 0) // memcmp(s1,s2,0) -> 0
- return Constant::getNullValue(CI->getType());
-
- // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
- if (Len == 1) {
- Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"),
- CI->getType(), "lhsv");
- Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"),
- CI->getType(), "rhsv");
- return B.CreateSub(LHSV, RHSV, "chardiff");
- }
-
- // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
- StringRef LHSStr, RHSStr;
- if (getConstantStringInfo(LHS, LHSStr) &&
- getConstantStringInfo(RHS, RHSStr)) {
- // Make sure we're not reading out-of-bounds memory.
- if (Len > LHSStr.size() || Len > RHSStr.size())
- return 0;
- uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len);
- return ConstantInt::get(CI->getType(), Ret);
- }
-
- return 0;
- }
-};
-
-//===---------------------------------------===//
-// 'memcpy' Optimizations
-
-struct MemCpyOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // These optimizations require TargetData.
- if (!TD) return 0;
-
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(*Context))
- return 0;
-
- // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
- B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1);
- return CI->getArgOperand(0);
- }
-};
-
-//===---------------------------------------===//
-// 'memmove' Optimizations
-
-struct MemMoveOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // These optimizations require TargetData.
- if (!TD) return 0;
-
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(*Context))
- return 0;
-
- // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
- B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1);
- return CI->getArgOperand(0);
- }
-};
-
-//===---------------------------------------===//
-// 'memset' Optimizations
-
-struct MemSetOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // These optimizations require TargetData.
- if (!TD) return 0;
-
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isIntegerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(*Context))
+ // If this is something like 'floor((double)floatval)', convert to floorf.
+ FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
+ if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
return 0;
- // memset(p, v, n) -> llvm.memset(p, v, n, 1)
- Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
- B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
- return CI->getArgOperand(0);
+ // floor((double)floatval) -> (double)floorf(floatval)
+ Value *V = Cast->getOperand(0);
+ V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
+ return B.CreateFPExt(V, B.getDoubleTy());
}
};
-//===----------------------------------------------------------------------===//
-// Math Library Optimizations
-//===----------------------------------------------------------------------===//
-
//===---------------------------------------===//
// 'cos*' Optimizations
-
struct CosOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *Ret = NULL;
+ if (UnsafeFPShrink && Callee->getName() == "cos" &&
+ TLI->has(LibFunc::cosf)) {
+ UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+ Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B);
+ }
+
FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 1 argument of FP type, which matches the
// result type.
if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isFloatingPointTy())
- return 0;
+ return Ret;
// cos(-x) -> cos(x)
Value *Op1 = CI->getArgOperand(0);
@@ -910,7 +162,7 @@ struct CosOpt : public LibCallOptimization {
BinaryOperator *BinExpr = cast<BinaryOperator>(Op1);
return B.CreateCall(Callee, BinExpr->getOperand(1), "cos");
}
- return 0;
+ return Ret;
}
};
@@ -919,13 +171,20 @@ struct CosOpt : public LibCallOptimization {
struct PowOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *Ret = NULL;
+ if (UnsafeFPShrink && Callee->getName() == "pow" &&
+ TLI->has(LibFunc::powf)) {
+ UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+ Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B);
+ }
+
FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 2 arguments of the same FP type, which match the
// result type.
if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
!FT->getParamType(0)->isFloatingPointTy())
- return 0;
+ return Ret;
Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
@@ -936,7 +195,7 @@ struct PowOpt : public LibCallOptimization {
}
ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
- if (Op2C == 0) return 0;
+ if (Op2C == 0) return Ret;
if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
return ConstantFP::get(CI->getType(), 1.0);
@@ -974,12 +233,19 @@ struct PowOpt : public LibCallOptimization {
struct Exp2Opt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *Ret = NULL;
+ if (UnsafeFPShrink && Callee->getName() == "exp2" &&
+ TLI->has(LibFunc::exp2)) {
+ UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+ Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B);
+ }
+
FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 1 argument of FP type, which matches the
// result type.
if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isFloatingPointTy())
- return 0;
+ return Ret;
Value *Op = CI->getArgOperand(0);
// Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
@@ -1016,29 +282,7 @@ struct Exp2Opt : public LibCallOptimization {
return CI;
}
- return 0;
- }
-};
-
-//===---------------------------------------===//
-// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
-
-struct UnaryDoubleFPOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
- !FT->getParamType(0)->isDoubleTy())
- return 0;
-
- // If this is something like 'floor((double)floatval)', convert to floorf.
- FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
- if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
- return 0;
-
- // floor((double)floatval) -> (double)floorf(floatval)
- Value *V = Cast->getOperand(0);
- V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
- return B.CreateFPExt(V, B.getDoubleTy());
+ return Ret;
}
};
@@ -1063,8 +307,8 @@ struct FFSOpt : public LibCallOptimization {
// Constant fold.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
- if (CI->getValue() == 0) // ffs(0) -> 0.
- return Constant::getNullValue(CI->getType());
+ if (CI->isZero()) // ffs(0) -> 0.
+ return B.getInt32(0);
// ffs(c) -> cttz(c)+1
return B.getInt32(CI->getValue().countTrailingZeros() + 1);
}
@@ -1267,7 +511,7 @@ struct SPrintFOpt : public LibCallOptimization {
if (FormatStr[i] == '%')
return 0; // we found a format specifier, bail out.
- // These optimizations require TargetData.
+ // These optimizations require DataLayout.
if (!TD) return 0;
// sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
@@ -1297,7 +541,7 @@ struct SPrintFOpt : public LibCallOptimization {
}
if (FormatStr[1] == 's') {
- // These optimizations require TargetData.
+ // These optimizations require DataLayout.
if (!TD) return 0;
// sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
@@ -1385,7 +629,7 @@ struct FWriteOpt : public LibCallOptimization {
struct FPutsOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // These optimizations require TargetData.
+ // These optimizations require DataLayout.
if (!TD) return 0;
// Require two pointers. Also, we can't optimize if return value is used.
@@ -1422,7 +666,7 @@ struct FPrintFOpt : public LibCallOptimization {
if (FormatStr[i] == '%') // Could handle %% -> % if we cared.
return 0; // We found a format specifier.
- // These optimizations require TargetData.
+ // These optimizations require DataLayout.
if (!TD) return 0;
Value *NewCI = EmitFWrite(CI->getArgOperand(1),
@@ -1524,17 +768,9 @@ namespace {
TargetLibraryInfo *TLI;
StringMap<LibCallOptimization*> Optimizations;
- // String and Memory LibCall Optimizations
- StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr;
- StrCmpOpt StrCmp; StrNCmpOpt StrNCmp;
- StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
- StpCpyOpt StpCpy; StpCpyOpt StpCpyChk;
- StrNCpyOpt StrNCpy;
- StrLenOpt StrLen; StrPBrkOpt StrPBrk;
- StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr;
- MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet;
// Math Library Optimizations
- CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP;
+ CosOpt Cos; PowOpt Pow; Exp2Opt Exp2;
+ UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP;
// Integer Optimizations
FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii;
ToAsciiOpt ToAscii;
@@ -1546,11 +782,13 @@ namespace {
bool Modified; // This is only used by doInitialization.
public:
static char ID; // Pass identification
- SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true),
- StpCpy(false), StpCpyChk(true) {
+ SimplifyLibCalls() : FunctionPass(ID), UnaryDoubleFP(false),
+ UnsafeUnaryDoubleFP(true) {
initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
}
void AddOpt(LibFunc::Func F, LibCallOptimization* Opt);
+ void AddOpt(LibFunc::Func F1, LibFunc::Func F2, LibCallOptimization* Opt);
+
void InitOptimizations();
bool runOnFunction(Function &F);
@@ -1586,40 +824,15 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F, LibCallOptimization* Opt) {
Optimizations[TLI->getName(F)] = Opt;
}
+void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2,
+ LibCallOptimization* Opt) {
+ if (TLI->has(F1) && TLI->has(F2))
+ Optimizations[TLI->getName(F1)] = Opt;
+}
+
/// Optimizations - Populate the Optimizations map with all the optimizations
/// we know.
void SimplifyLibCalls::InitOptimizations() {
- // String and Memory LibCall Optimizations
- Optimizations["strcat"] = &StrCat;
- Optimizations["strncat"] = &StrNCat;
- Optimizations["strchr"] = &StrChr;
- Optimizations["strrchr"] = &StrRChr;
- Optimizations["strcmp"] = &StrCmp;
- Optimizations["strncmp"] = &StrNCmp;
- Optimizations["strcpy"] = &StrCpy;
- Optimizations["strncpy"] = &StrNCpy;
- Optimizations["stpcpy"] = &StpCpy;
- Optimizations["strlen"] = &StrLen;
- Optimizations["strpbrk"] = &StrPBrk;
- Optimizations["strtol"] = &StrTo;
- Optimizations["strtod"] = &StrTo;
- Optimizations["strtof"] = &StrTo;
- Optimizations["strtoul"] = &StrTo;
- Optimizations["strtoll"] = &StrTo;
- Optimizations["strtold"] = &StrTo;
- Optimizations["strtoull"] = &StrTo;
- Optimizations["strspn"] = &StrSpn;
- Optimizations["strcspn"] = &StrCSpn;
- Optimizations["strstr"] = &StrStr;
- Optimizations["memcmp"] = &MemCmp;
- AddOpt(LibFunc::memcpy, &MemCpy);
- Optimizations["memmove"] = &MemMove;
- AddOpt(LibFunc::memset, &MemSet);
-
- // _chk variants of String and Memory LibCall Optimizations.
- Optimizations["__strcpy_chk"] = &StrCpyChk;
- Optimizations["__stpcpy_chk"] = &StpCpyChk;
-
// Math Library Optimizations
Optimizations["cosf"] = &Cos;
Optimizations["cos"] = &Cos;
@@ -1641,16 +854,37 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["llvm.exp2.f64"] = &Exp2;
Optimizations["llvm.exp2.f32"] = &Exp2;
- if (TLI->has(LibFunc::floor) && TLI->has(LibFunc::floorf))
- Optimizations["floor"] = &UnaryDoubleFP;
- if (TLI->has(LibFunc::ceil) && TLI->has(LibFunc::ceilf))
- Optimizations["ceil"] = &UnaryDoubleFP;
- if (TLI->has(LibFunc::round) && TLI->has(LibFunc::roundf))
- Optimizations["round"] = &UnaryDoubleFP;
- if (TLI->has(LibFunc::rint) && TLI->has(LibFunc::rintf))
- Optimizations["rint"] = &UnaryDoubleFP;
- if (TLI->has(LibFunc::nearbyint) && TLI->has(LibFunc::nearbyintf))
- Optimizations["nearbyint"] = &UnaryDoubleFP;
+ AddOpt(LibFunc::ceil, LibFunc::ceilf, &UnaryDoubleFP);
+ AddOpt(LibFunc::fabs, LibFunc::fabsf, &UnaryDoubleFP);
+ AddOpt(LibFunc::floor, LibFunc::floorf, &UnaryDoubleFP);
+ AddOpt(LibFunc::rint, LibFunc::rintf, &UnaryDoubleFP);
+ AddOpt(LibFunc::round, LibFunc::roundf, &UnaryDoubleFP);
+ AddOpt(LibFunc::nearbyint, LibFunc::nearbyintf, &UnaryDoubleFP);
+ AddOpt(LibFunc::trunc, LibFunc::truncf, &UnaryDoubleFP);
+
+ if(UnsafeFPShrink) {
+ AddOpt(LibFunc::acos, LibFunc::acosf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::acosh, LibFunc::acoshf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::asin, LibFunc::asinf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::asinh, LibFunc::asinhf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::atan, LibFunc::atanf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::atanh, LibFunc::atanhf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::cbrt, LibFunc::cbrtf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::cosh, LibFunc::coshf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::exp, LibFunc::expf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::exp10, LibFunc::exp10f, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::expm1, LibFunc::expm1f, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::log, LibFunc::logf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::log10, LibFunc::log10f, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::log1p, LibFunc::log1pf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::log2, LibFunc::log2f, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::logb, LibFunc::logbf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::sin, LibFunc::sinf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::sinh, LibFunc::sinhf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::sqrt, LibFunc::sqrtf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::tan, LibFunc::tanf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::tanh, LibFunc::tanhf, &UnsafeUnaryDoubleFP);
+ }
// Integer Optimizations
Optimizations["ffs"] = &FFS;
@@ -1681,7 +915,7 @@ bool SimplifyLibCalls::runOnFunction(Function &F) {
if (Optimizations.empty())
InitOptimizations();
- const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
IRBuilder<> Builder(F.getContext());
diff --git a/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp b/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp
index d831452..6815e41 100644
--- a/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -16,7 +16,7 @@
#include "llvm/GlobalValue.h"
#include "llvm/Instruction.h"
#include "llvm/Assembly/Writer.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
@@ -55,10 +55,12 @@ void ExtAddrMode::print(raw_ostream &OS) const {
OS << ']';
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void ExtAddrMode::dump() const {
print(dbgs());
dbgs() << '\n';
}
+#endif
/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
@@ -219,7 +221,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
unsigned VariableScale = 0;
int64_t ConstantOffset = 0;
- const TargetData *TD = TLI.getTargetData();
+ const DataLayout *TD = TLI.getDataLayout();
gep_type_iterator GTI = gep_type_begin(AddrInst);
for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 2679b93..9fea113 100644
--- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -22,7 +22,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Support/ErrorHandling.h"
@@ -94,7 +94,7 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) {
/// is dead. Also recursively delete any operands that become dead as
/// a result. This includes tracing the def-use list from the PHI to see if
/// it is ultimately unused or if it reaches an unused cycle.
-bool llvm::DeleteDeadPHIs(BasicBlock *BB) {
+bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
// Recursively deleting a PHI may cause multiple PHIs to be deleted
// or RAUW'd undef, so use an array of WeakVH for the PHIs to delete.
SmallVector<WeakVH, 8> PHIs;
@@ -105,7 +105,7 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB) {
bool Changed = false;
for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*()))
- Changed |= RecursivelyDeleteDeadPHINode(PN);
+ Changed |= RecursivelyDeleteDeadPHINode(PN, TLI);
return Changed;
}
@@ -687,3 +687,42 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
return cast<ReturnInst>(NewRet);
}
+/// SplitBlockAndInsertIfThen - Split the containing block at the
+/// specified instruction - everything before and including Cmp stays
+/// in the old basic block, and everything after Cmp is moved to a
+/// new block. The two blocks are connected by a conditional branch
+/// (with value of Cmp being the condition).
+/// Before:
+/// Head
+/// Cmp
+/// Tail
+/// After:
+/// Head
+/// Cmp
+/// if (Cmp)
+/// ThenBlock
+/// Tail
+///
+/// If Unreachable is true, then ThenBlock ends with
+/// UnreachableInst, otherwise it branches to Tail.
+/// Returns the NewBasicBlock's terminator.
+
+TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp,
+ bool Unreachable, MDNode *BranchWeights) {
+ Instruction *SplitBefore = Cmp->getNextNode();
+ BasicBlock *Head = SplitBefore->getParent();
+ BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
+ TerminatorInst *HeadOldTerm = Head->getTerminator();
+ LLVMContext &C = Head->getContext();
+ BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+ TerminatorInst *CheckTerm;
+ if (Unreachable)
+ CheckTerm = new UnreachableInst(C, ThenBlock);
+ else
+ CheckTerm = BranchInst::Create(Tail, ThenBlock);
+ BranchInst *HeadNewTerm =
+ BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp);
+ HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
+ ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
+ return CheckTerm;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index e13fd71..74b2ee1 100644
--- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -22,7 +22,7 @@
#include "llvm/Module.h"
#include "llvm/Type.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLibraryInfo.h"
using namespace llvm;
@@ -34,19 +34,22 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
/// EmitStrLen - Emit a call to the strlen function to the builder, for the
/// specified pointer. This always returns an integer value of size intptr_t.
-Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD,
+Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strlen))
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
- AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
- Attribute::NoUnwind);
+ AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
+ Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
+ AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ ArrayRef<Attributes::AttrVal>(AVs, 2));
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI),
+ Constant *StrLen = M->getOrInsertFunction("strlen",
+ AttrListPtr::get(M->getContext(),
+ AWI),
TD->getIntPtrType(Context),
B.getInt8PtrTy(),
NULL);
@@ -61,18 +64,21 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD,
/// specified pointer. Ptr is required to be some pointer type, MaxLen must
/// be of size_t type, and the return value has 'intptr_t' type.
Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
- const TargetData *TD, const TargetLibraryInfo *TLI) {
+ const DataLayout *TD, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strnlen))
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
- AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
- Attribute::NoUnwind);
+ AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
+ Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
+ AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ ArrayRef<Attributes::AttrVal>(AVs, 2));
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Constant *StrNLen = M->getOrInsertFunction("strnlen", AttrListPtr::get(AWI),
+ Constant *StrNLen = M->getOrInsertFunction("strnlen",
+ AttrListPtr::get(M->getContext(),
+ AWI),
TD->getIntPtrType(Context),
B.getInt8PtrTy(),
TD->getIntPtrType(Context),
@@ -88,17 +94,21 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
/// specified pointer and character. Ptr is required to be some pointer type,
/// and the return value has 'i8*' type.
Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
- const TargetData *TD, const TargetLibraryInfo *TLI) {
+ const DataLayout *TD, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strchr))
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
AttributeWithIndex AWI =
- AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
+ AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ ArrayRef<Attributes::AttrVal>(AVs, 2));
Type *I8Ptr = B.getInt8PtrTy();
Type *I32Ty = B.getInt32Ty();
- Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(AWI),
+ Constant *StrChr = M->getOrInsertFunction("strchr",
+ AttrListPtr::get(M->getContext(),
+ AWI),
I8Ptr, I8Ptr, I32Ty, NULL);
CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
ConstantInt::get(I32Ty, C), "strchr");
@@ -109,20 +119,23 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
/// EmitStrNCmp - Emit a call to the strncmp function to the builder.
Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
- IRBuilder<> &B, const TargetData *TD,
+ IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strncmp))
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
- AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
- AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
- Attribute::NoUnwind);
+ AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
+ AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
+ Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
+ AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ ArrayRef<Attributes::AttrVal>(AVs, 2));
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI),
+ Value *StrNCmp = M->getOrInsertFunction("strncmp",
+ AttrListPtr::get(M->getContext(),
+ AWI),
B.getInt32Ty(),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
@@ -139,17 +152,19 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
/// specified pointer arguments.
Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
- const TargetData *TD, const TargetLibraryInfo *TLI,
+ const DataLayout *TD, const TargetLibraryInfo *TLI,
StringRef Name) {
if (!TLI->has(LibFunc::strcpy))
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
- AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
+ AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::NoUnwind);
Type *I8Ptr = B.getInt8PtrTy();
- Value *StrCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI),
+ Value *StrCpy = M->getOrInsertFunction(Name,
+ AttrListPtr::get(M->getContext(), AWI),
I8Ptr, I8Ptr, I8Ptr, NULL);
CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
Name);
@@ -161,17 +176,20 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
/// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
/// specified pointer arguments.
Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
- IRBuilder<> &B, const TargetData *TD,
+ IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI, StringRef Name) {
if (!TLI->has(LibFunc::strncpy))
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
- AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
+ AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::NoUnwind);
Type *I8Ptr = B.getInt8PtrTy();
- Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI),
+ Value *StrNCpy = M->getOrInsertFunction(Name,
+ AttrListPtr::get(M->getContext(),
+ AWI),
I8Ptr, I8Ptr, I8Ptr,
Len->getType(), NULL);
CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
@@ -185,17 +203,18 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
/// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
/// are pointers.
Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
- IRBuilder<> &B, const TargetData *TD,
+ IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::memcpy_chk))
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI;
- AWI = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::NoUnwind);
LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *MemCpy = M->getOrInsertFunction("__memcpy_chk",
- AttrListPtr::get(AWI),
+ AttrListPtr::get(M->getContext(), AWI),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
@@ -212,16 +231,19 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is
/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
- Value *Len, IRBuilder<> &B, const TargetData *TD,
+ Value *Len, IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::memchr))
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI;
- AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
+ Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
+ AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ ArrayRef<Attributes::AttrVal>(AVs, 2));
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(AWI),
+ Value *MemChr = M->getOrInsertFunction("memchr",
+ AttrListPtr::get(M->getContext(), AWI),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
B.getInt32Ty(),
@@ -237,20 +259,22 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
/// EmitMemCmp - Emit a call to the memcmp function.
Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
- Value *Len, IRBuilder<> &B, const TargetData *TD,
+ Value *Len, IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::memcmp))
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
- AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
- AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
- Attribute::NoUnwind);
+ AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
+ AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
+ Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
+ AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ ArrayRef<Attributes::AttrVal>(AVs, 2));
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI),
+ Value *MemCmp = M->getOrInsertFunction("memcmp",
+ AttrListPtr::get(M->getContext(), AWI),
B.getInt32Ty(),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
@@ -294,7 +318,7 @@ Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
/// is an integer.
-Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD,
+Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::putchar))
return 0;
@@ -316,17 +340,19 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD,
/// EmitPutS - Emit a call to the puts function. This assumes that Str is
/// some pointer.
-Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD,
+Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::puts))
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
- AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
+ AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::NoUnwind);
- Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI),
+ Value *PutS = M->getOrInsertFunction("puts",
+ AttrListPtr::get(M->getContext(), AWI),
B.getInt32Ty(),
B.getInt8PtrTy(),
NULL);
@@ -339,17 +365,19 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD,
/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is
/// an integer and File is a pointer to FILE.
Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
- const TargetData *TD, const TargetLibraryInfo *TLI) {
+ const DataLayout *TD, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::fputc))
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
- AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
+ AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::NoUnwind);
Constant *F;
if (File->getType()->isPointerTy())
- F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI),
+ F = M->getOrInsertFunction("fputc",
+ AttrListPtr::get(M->getContext(), AWI),
B.getInt32Ty(),
B.getInt32Ty(), File->getType(),
NULL);
@@ -370,19 +398,21 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
/// EmitFPutS - Emit a call to the puts function. Str is required to be a
/// pointer and File is a pointer to FILE.
Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
- const TargetData *TD, const TargetLibraryInfo *TLI) {
+ const DataLayout *TD, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::fputs))
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
- AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
- AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
+ AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
+ AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::NoUnwind);
StringRef FPutsName = TLI->getName(LibFunc::fputs);
Constant *F;
if (File->getType()->isPointerTy())
- F = M->getOrInsertFunction(FPutsName, AttrListPtr::get(AWI),
+ F = M->getOrInsertFunction(FPutsName,
+ AttrListPtr::get(M->getContext(), AWI),
B.getInt32Ty(),
B.getInt8PtrTy(),
File->getType(), NULL);
@@ -400,21 +430,23 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is
/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
- IRBuilder<> &B, const TargetData *TD,
+ IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::fwrite))
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
- AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture);
- AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
+ AWI[1] = AttributeWithIndex::get(M->getContext(), 4, Attributes::NoCapture);
+ AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+ Attributes::NoUnwind);
LLVMContext &Context = B.GetInsertBlock()->getContext();
StringRef FWriteName = TLI->getName(LibFunc::fwrite);
Constant *F;
if (File->getType()->isPointerTy())
- F = M->getOrInsertFunction(FWriteName, AttrListPtr::get(AWI),
+ F = M->getOrInsertFunction(FWriteName,
+ AttrListPtr::get(M->getContext(), AWI),
TD->getIntPtrType(Context),
B.getInt8PtrTy(),
TD->getIntPtrType(Context),
@@ -436,9 +468,9 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { }
-bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD,
+bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
- // We really need TargetData for later.
+ // We really need DataLayout for later.
if (!TD) return false;
this->CI = CI;
diff --git a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
new file mode 100644
index 0000000..bee2f7b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -0,0 +1,262 @@
+//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an optimization for div and rem on architectures that
+// execute short instructions significantly faster than longer instructions.
+// For example, on Intel Atom 32-bit divides are slow enough that during
+// runtime it is profitable to check the value of the operands, and if they are
+// positive and less than 256 use an unsigned 8-bit divide.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "bypass-slow-division"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Transforms/Utils/BypassSlowDivision.h"
+
+using namespace llvm;
+
+namespace {
+ struct DivOpInfo {
+ bool SignedOp;
+ Value *Dividend;
+ Value *Divisor;
+
+ DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
+ : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
+ };
+
+ struct DivPhiNodes {
+ PHINode *Quotient;
+ PHINode *Remainder;
+
+ DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
+ : Quotient(InQuotient), Remainder(InRemainder) {}
+ };
+}
+
+namespace llvm {
+ template<>
+ struct DenseMapInfo<DivOpInfo> {
+ static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
+ return Val1.SignedOp == Val2.SignedOp &&
+ Val1.Dividend == Val2.Dividend &&
+ Val1.Divisor == Val2.Divisor;
+ }
+
+ static DivOpInfo getEmptyKey() {
+ return DivOpInfo(false, 0, 0);
+ }
+
+ static DivOpInfo getTombstoneKey() {
+ return DivOpInfo(true, 0, 0);
+ }
+
+ static unsigned getHashValue(const DivOpInfo &Val) {
+ return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
+ reinterpret_cast<uintptr_t>(Val.Divisor)) ^
+ (unsigned)Val.SignedOp;
+ }
+ };
+
+ typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
+}
+
+// insertFastDiv - Substitutes the div/rem instruction with code that checks the
+// value of the operands and uses a shorter-faster div/rem instruction when
+// possible and the longer-slower div/rem instruction otherwise.
+static bool insertFastDiv(Function &F,
+ Function::iterator &I,
+ BasicBlock::iterator &J,
+ IntegerType *BypassType,
+ bool UseDivOp,
+ bool UseSignedOp,
+ DivCacheTy &PerBBDivCache) {
+ // Get instruction operands
+ Instruction *Instr = J;
+ Value *Dividend = Instr->getOperand(0);
+ Value *Divisor = Instr->getOperand(1);
+
+ if (isa<ConstantInt>(Divisor) ||
+ (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
+ // Operations with immediate values should have
+ // been solved and replaced during compile time.
+ return false;
+ }
+
+ // Basic Block is split before divide
+ BasicBlock *MainBB = I;
+ BasicBlock *SuccessorBB = I->splitBasicBlock(J);
+ ++I; //advance iterator I to successorBB
+
+ // Add new basic block for slow divide operation
+ BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
+ MainBB->getParent(), SuccessorBB);
+ SlowBB->moveBefore(SuccessorBB);
+ IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
+ Value *SlowQuotientV;
+ Value *SlowRemainderV;
+ if (UseSignedOp) {
+ SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
+ SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
+ } else {
+ SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
+ SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
+ }
+ SlowBuilder.CreateBr(SuccessorBB);
+
+ // Add new basic block for fast divide operation
+ BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
+ MainBB->getParent(), SuccessorBB);
+ FastBB->moveBefore(SlowBB);
+ IRBuilder<> FastBuilder(FastBB, FastBB->begin());
+ Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
+ BypassType);
+ Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
+ BypassType);
+
+ // udiv/urem because optimization only handles positive numbers
+ Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
+ ShortDivisorV);
+ Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
+ ShortDivisorV);
+ Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
+ ShortQuotientV,
+ Dividend->getType());
+ Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
+ ShortRemainderV,
+ Dividend->getType());
+ FastBuilder.CreateBr(SuccessorBB);
+
+ // Phi nodes for result of div and rem
+ IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
+ PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
+ QuoPhi->addIncoming(SlowQuotientV, SlowBB);
+ QuoPhi->addIncoming(FastQuotientV, FastBB);
+ PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
+ RemPhi->addIncoming(SlowRemainderV, SlowBB);
+ RemPhi->addIncoming(FastRemainderV, FastBB);
+
+ // Replace Instr with appropriate phi node
+ if (UseDivOp)
+ Instr->replaceAllUsesWith(QuoPhi);
+ else
+ Instr->replaceAllUsesWith(RemPhi);
+ Instr->eraseFromParent();
+
+ // Combine operands into a single value with OR for value testing below
+ MainBB->getInstList().back().eraseFromParent();
+ IRBuilder<> MainBuilder(MainBB, MainBB->end());
+ Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
+
+ // BitMask is inverted to check if the operands are
+ // larger than the bypass type
+ uint64_t BitMask = ~BypassType->getBitMask();
+ Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
+
+ // Compare operand values and branch
+ Value *ZeroV = MainBuilder.getInt32(0);
+ Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
+ MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
+
+ // point iterator J at first instruction of successorBB
+ J = I->begin();
+
+ // Cache phi nodes to be used later in place of other instances
+ // of div or rem with the same sign, dividend, and divisor
+ DivOpInfo Key(UseSignedOp, Dividend, Divisor);
+ DivPhiNodes Value(QuoPhi, RemPhi);
+ PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
+ return true;
+}
+
+// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
+// operands and operation are identical. Otherwise call insertFastDiv to perform
+// the optimization and cache the resulting dividend and remainder.
+static bool reuseOrInsertFastDiv(Function &F,
+ Function::iterator &I,
+ BasicBlock::iterator &J,
+ IntegerType *BypassType,
+ bool UseDivOp,
+ bool UseSignedOp,
+ DivCacheTy &PerBBDivCache) {
+ // Get instruction operands
+ Instruction *Instr = J;
+ DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
+ DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
+
+ if (CacheI == PerBBDivCache.end()) {
+ // If previous instance does not exist, insert fast div
+ return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
+ PerBBDivCache);
+ }
+
+ // Replace operation value with previously generated phi node
+ DivPhiNodes &Value = CacheI->second;
+ if (UseDivOp) {
+ // Replace all uses of div instruction with quotient phi node
+ J->replaceAllUsesWith(Value.Quotient);
+ } else {
+ // Replace all uses of rem instruction with remainder phi node
+ J->replaceAllUsesWith(Value.Remainder);
+ }
+
+ // Advance to next operation
+ ++J;
+
+ // Remove redundant operation
+ Instr->eraseFromParent();
+ return true;
+}
+
+// bypassSlowDivision - This optimization identifies DIV instructions that can
+// be profitably bypassed and carried out with a shorter, faster divide.
+bool llvm::bypassSlowDivision(Function &F,
+ Function::iterator &I,
+ const DenseMap<unsigned int, unsigned int> &BypassWidths) {
+ DivCacheTy DivCache;
+
+ bool MadeChange = false;
+ for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
+
+ // Get instruction details
+ unsigned Opcode = J->getOpcode();
+ bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
+ bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
+ bool UseSignedOp = Opcode == Instruction::SDiv ||
+ Opcode == Instruction::SRem;
+
+ // Only optimize div or rem ops
+ if (!UseDivOp && !UseRemOp)
+ continue;
+
+ // Skip division on vector types, only optimize integer instructions
+ if (!J->getType()->isIntegerTy())
+ continue;
+
+ // Get bitwidth of div/rem instruction
+ IntegerType *T = cast<IntegerType>(J->getType());
+ int bitwidth = T->getBitWidth();
+
+ // Continue if bitwidth is not bypassed
+ DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
+ if (BI == BypassWidths.end())
+ continue;
+
+ // Get type for div/rem instruction with bypass bitwidth
+ IntegerType *BT = IntegerType::get(J->getContext(), BI->second);
+
+ MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp,
+ UseSignedOp, DivCache);
+ }
+
+ return MadeChange;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 99237b8..7ba9f6d 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -98,10 +98,14 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
Anew->addAttr( OldFunc->getAttributes()
.getParamAttributes(I->getArgNo() + 1));
NewFunc->setAttributes(NewFunc->getAttributes()
- .addAttr(0, OldFunc->getAttributes()
+ .addAttr(NewFunc->getContext(),
+ AttrListPtr::ReturnIndex,
+ OldFunc->getAttributes()
.getRetAttributes()));
NewFunc->setAttributes(NewFunc->getAttributes()
- .addAttr(~0, OldFunc->getAttributes()
+ .addAttr(NewFunc->getContext(),
+ AttrListPtr::FunctionIndex,
+ OldFunc->getAttributes()
.getFnAttributes()));
}
@@ -202,14 +206,14 @@ namespace {
bool ModuleLevelChanges;
const char *NameSuffix;
ClonedCodeInfo *CodeInfo;
- const TargetData *TD;
+ const DataLayout *TD;
public:
PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
ValueToValueMapTy &valueMap,
bool moduleLevelChanges,
const char *nameSuffix,
ClonedCodeInfo *codeInfo,
- const TargetData *td)
+ const DataLayout *td)
: NewFunc(newFunc), OldFunc(oldFunc),
VMap(valueMap), ModuleLevelChanges(moduleLevelChanges),
NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
@@ -365,7 +369,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix,
ClonedCodeInfo *CodeInfo,
- const TargetData *TD,
+ const DataLayout *TD,
Instruction *TheCall) {
assert(NameSuffix && "NameSuffix cannot be null!");
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index c545cd6..281714f 100644
--- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -346,7 +346,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
header->getName(), M);
// If the old function is no-throw, so is the new one.
if (oldFunction->doesNotThrow())
- newFunction->setDoesNotThrow(true);
+ newFunction->setDoesNotThrow();
newFunction->getBasicBlockList().push_back(newRootNode);
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 89e89e7..009847f 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -27,7 +27,7 @@
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Support/CallSite.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -357,7 +357,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
Type *VoidPtrTy = Type::getInt8PtrTy(Context);
- // Create the alloca. If we have TargetData, use nice alignment.
+ // Create the alloca. If we have DataLayout, use nice alignment.
unsigned Align = 1;
if (IFI.TD)
Align = IFI.TD->getPrefTypeAlignment(AggTy);
diff --git a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
new file mode 100644
index 0000000..55227e2
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
@@ -0,0 +1,420 @@
+//===-- IntegerDivision.cpp - Expand integer division ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an implementation of 32bit scalar integer division for
+// targets that don't have native support. It's largely derived from
+// compiler-rt's implementation of __udivsi3, but hand-tuned to reduce the
+// amount of control flow
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "integer-division"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/Transforms/Utils/IntegerDivision.h"
+
+using namespace llvm;
+
+/// Generate code to compute the remainder of two signed integers. Returns the
+/// remainder, which will have the sign of the dividend. Builder's insert point
+/// should be pointing where the caller wants code generated, e.g. at the srem
+/// instruction. This will generate a urem in the process, and Builder's insert
+/// point will be pointing at the uren (if present, i.e. not folded), ready to
+/// be expanded if the user wishes
+static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ ConstantInt *ThirtyOne = Builder.getInt32(31);
+
+ // ; %dividend_sgn = ashr i32 %dividend, 31
+ // ; %divisor_sgn = ashr i32 %divisor, 31
+ // ; %dvd_xor = xor i32 %dividend, %dividend_sgn
+ // ; %dvs_xor = xor i32 %divisor, %divisor_sgn
+ // ; %u_dividend = sub i32 %dvd_xor, %dividend_sgn
+ // ; %u_divisor = sub i32 %dvs_xor, %divisor_sgn
+ // ; %urem = urem i32 %dividend, %divisor
+ // ; %xored = xor i32 %urem, %dividend_sgn
+ // ; %srem = sub i32 %xored, %dividend_sgn
+ Value *DividendSign = Builder.CreateAShr(Dividend, ThirtyOne);
+ Value *DivisorSign = Builder.CreateAShr(Divisor, ThirtyOne);
+ Value *DvdXor = Builder.CreateXor(Dividend, DividendSign);
+ Value *DvsXor = Builder.CreateXor(Divisor, DivisorSign);
+ Value *UDividend = Builder.CreateSub(DvdXor, DividendSign);
+ Value *UDivisor = Builder.CreateSub(DvsXor, DivisorSign);
+ Value *URem = Builder.CreateURem(UDividend, UDivisor);
+ Value *Xored = Builder.CreateXor(URem, DividendSign);
+ Value *SRem = Builder.CreateSub(Xored, DividendSign);
+
+ if (Instruction *URemInst = dyn_cast<Instruction>(URem))
+ Builder.SetInsertPoint(URemInst);
+
+ return SRem;
+}
+
+
+/// Generate code to compute the remainder of two unsigned integers. Returns the
+/// remainder. Builder's insert point should be pointing where the caller wants
+/// code generated, e.g. at the urem instruction. This will generate a udiv in
+/// the process, and Builder's insert point will be pointing at the udiv (if
+/// present, i.e. not folded), ready to be expanded if the user wishes
+static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ // Remainder = Dividend - Quotient*Divisor
+
+ // ; %quotient = udiv i32 %dividend, %divisor
+ // ; %product = mul i32 %divisor, %quotient
+ // ; %remainder = sub i32 %dividend, %product
+ Value *Quotient = Builder.CreateUDiv(Dividend, Divisor);
+ Value *Product = Builder.CreateMul(Divisor, Quotient);
+ Value *Remainder = Builder.CreateSub(Dividend, Product);
+
+ if (Instruction *UDiv = dyn_cast<Instruction>(Quotient))
+ Builder.SetInsertPoint(UDiv);
+
+ return Remainder;
+}
+
+/// Generate code to divide two signed integers. Returns the quotient, rounded
+/// towards 0. Builder's insert point should be pointing where the caller wants
+/// code generated, e.g. at the sdiv instruction. This will generate a udiv in
+/// the process, and Builder's insert point will be pointing at the udiv (if
+/// present, i.e. not folded), ready to be expanded if the user wishes.
+static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ // Implementation taken from compiler-rt's __divsi3
+
+ ConstantInt *ThirtyOne = Builder.getInt32(31);
+
+ // ; %tmp = ashr i32 %dividend, 31
+ // ; %tmp1 = ashr i32 %divisor, 31
+ // ; %tmp2 = xor i32 %tmp, %dividend
+ // ; %u_dvnd = sub nsw i32 %tmp2, %tmp
+ // ; %tmp3 = xor i32 %tmp1, %divisor
+ // ; %u_dvsr = sub nsw i32 %tmp3, %tmp1
+ // ; %q_sgn = xor i32 %tmp1, %tmp
+ // ; %q_mag = udiv i32 %u_dvnd, %u_dvsr
+ // ; %tmp4 = xor i32 %q_mag, %q_sgn
+ // ; %q = sub i32 %tmp4, %q_sgn
+ Value *Tmp = Builder.CreateAShr(Dividend, ThirtyOne);
+ Value *Tmp1 = Builder.CreateAShr(Divisor, ThirtyOne);
+ Value *Tmp2 = Builder.CreateXor(Tmp, Dividend);
+ Value *U_Dvnd = Builder.CreateSub(Tmp2, Tmp);
+ Value *Tmp3 = Builder.CreateXor(Tmp1, Divisor);
+ Value *U_Dvsr = Builder.CreateSub(Tmp3, Tmp1);
+ Value *Q_Sgn = Builder.CreateXor(Tmp1, Tmp);
+ Value *Q_Mag = Builder.CreateUDiv(U_Dvnd, U_Dvsr);
+ Value *Tmp4 = Builder.CreateXor(Q_Mag, Q_Sgn);
+ Value *Q = Builder.CreateSub(Tmp4, Q_Sgn);
+
+ if (Instruction *UDiv = dyn_cast<Instruction>(Q_Mag))
+ Builder.SetInsertPoint(UDiv);
+
+ return Q;
+}
+
+/// Generates code to divide two unsigned scalar 32-bit integers. Returns the
+/// quotient, rounded towards 0. Builder's insert point should be pointing where
+/// the caller wants code generated, e.g. at the udiv instruction.
+static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ // The basic algorithm can be found in the compiler-rt project's
+ // implementation of __udivsi3.c. Here, we do a lower-level IR based approach
+ // that's been hand-tuned to lessen the amount of control flow involved.
+
+ // Some helper values
+ IntegerType *I32Ty = Builder.getInt32Ty();
+
+ ConstantInt *Zero = Builder.getInt32(0);
+ ConstantInt *One = Builder.getInt32(1);
+ ConstantInt *ThirtyOne = Builder.getInt32(31);
+ ConstantInt *NegOne = ConstantInt::getSigned(I32Ty, -1);
+ ConstantInt *True = Builder.getTrue();
+
+ BasicBlock *IBB = Builder.GetInsertBlock();
+ Function *F = IBB->getParent();
+ Function *CTLZi32 = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
+ I32Ty);
+
+ // Our CFG is going to look like:
+ // +---------------------+
+ // | special-cases |
+ // | ... |
+ // +---------------------+
+ // | |
+ // | +----------+
+ // | | bb1 |
+ // | | ... |
+ // | +----------+
+ // | | |
+ // | | +------------+
+ // | | | preheader |
+ // | | | ... |
+ // | | +------------+
+ // | | |
+ // | | | +---+
+ // | | | | |
+ // | | +------------+ |
+ // | | | do-while | |
+ // | | | ... | |
+ // | | +------------+ |
+ // | | | | |
+ // | +-----------+ +---+
+ // | | loop-exit |
+ // | | ... |
+ // | +-----------+
+ // | |
+ // +-------+
+ // | ... |
+ // | end |
+ // +-------+
+ BasicBlock *SpecialCases = Builder.GetInsertBlock();
+ SpecialCases->setName(Twine(SpecialCases->getName(), "_udiv-special-cases"));
+ BasicBlock *End = SpecialCases->splitBasicBlock(Builder.GetInsertPoint(),
+ "udiv-end");
+ BasicBlock *LoopExit = BasicBlock::Create(Builder.getContext(),
+ "udiv-loop-exit", F, End);
+ BasicBlock *DoWhile = BasicBlock::Create(Builder.getContext(),
+ "udiv-do-while", F, End);
+ BasicBlock *Preheader = BasicBlock::Create(Builder.getContext(),
+ "udiv-preheader", F, End);
+ BasicBlock *BB1 = BasicBlock::Create(Builder.getContext(),
+ "udiv-bb1", F, End);
+
+ // We'll be overwriting the terminator to insert our extra blocks
+ SpecialCases->getTerminator()->eraseFromParent();
+
+ // First off, check for special cases: dividend or divisor is zero, divisor
+ // is greater than dividend, and divisor is 1.
+ // ; special-cases:
+ // ; %ret0_1 = icmp eq i32 %divisor, 0
+ // ; %ret0_2 = icmp eq i32 %dividend, 0
+ // ; %ret0_3 = or i1 %ret0_1, %ret0_2
+ // ; %tmp0 = tail call i32 @llvm.ctlz.i32(i32 %divisor, i1 true)
+ // ; %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %dividend, i1 true)
+ // ; %sr = sub nsw i32 %tmp0, %tmp1
+ // ; %ret0_4 = icmp ugt i32 %sr, 31
+ // ; %ret0 = or i1 %ret0_3, %ret0_4
+ // ; %retDividend = icmp eq i32 %sr, 31
+ // ; %retVal = select i1 %ret0, i32 0, i32 %dividend
+ // ; %earlyRet = or i1 %ret0, %retDividend
+ // ; br i1 %earlyRet, label %end, label %bb1
+ Builder.SetInsertPoint(SpecialCases);
+ Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero);
+ Value *Ret0_2 = Builder.CreateICmpEQ(Dividend, Zero);
+ Value *Ret0_3 = Builder.CreateOr(Ret0_1, Ret0_2);
+ Value *Tmp0 = Builder.CreateCall2(CTLZi32, Divisor, True);
+ Value *Tmp1 = Builder.CreateCall2(CTLZi32, Dividend, True);
+ Value *SR = Builder.CreateSub(Tmp0, Tmp1);
+ Value *Ret0_4 = Builder.CreateICmpUGT(SR, ThirtyOne);
+ Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4);
+ Value *RetDividend = Builder.CreateICmpEQ(SR, ThirtyOne);
+ Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend);
+ Value *EarlyRet = Builder.CreateOr(Ret0, RetDividend);
+ Builder.CreateCondBr(EarlyRet, End, BB1);
+
+ // ; bb1: ; preds = %special-cases
+ // ; %sr_1 = add i32 %sr, 1
+ // ; %tmp2 = sub i32 31, %sr
+ // ; %q = shl i32 %dividend, %tmp2
+ // ; %skipLoop = icmp eq i32 %sr_1, 0
+ // ; br i1 %skipLoop, label %loop-exit, label %preheader
+ Builder.SetInsertPoint(BB1);
+ Value *SR_1 = Builder.CreateAdd(SR, One);
+ Value *Tmp2 = Builder.CreateSub(ThirtyOne, SR);
+ Value *Q = Builder.CreateShl(Dividend, Tmp2);
+ Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero);
+ Builder.CreateCondBr(SkipLoop, LoopExit, Preheader);
+
+ // ; preheader: ; preds = %bb1
+ // ; %tmp3 = lshr i32 %dividend, %sr_1
+ // ; %tmp4 = add i32 %divisor, -1
+ // ; br label %do-while
+ Builder.SetInsertPoint(Preheader);
+ Value *Tmp3 = Builder.CreateLShr(Dividend, SR_1);
+ Value *Tmp4 = Builder.CreateAdd(Divisor, NegOne);
+ Builder.CreateBr(DoWhile);
+
+ // ; do-while: ; preds = %do-while, %preheader
+ // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
+ // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ]
+ // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ]
+ // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ]
+ // ; %tmp5 = shl i32 %r_1, 1
+ // ; %tmp6 = lshr i32 %q_2, 31
+ // ; %tmp7 = or i32 %tmp5, %tmp6
+ // ; %tmp8 = shl i32 %q_2, 1
+ // ; %q_1 = or i32 %carry_1, %tmp8
+ // ; %tmp9 = sub i32 %tmp4, %tmp7
+ // ; %tmp10 = ashr i32 %tmp9, 31
+ // ; %carry = and i32 %tmp10, 1
+ // ; %tmp11 = and i32 %tmp10, %divisor
+ // ; %r = sub i32 %tmp7, %tmp11
+ // ; %sr_2 = add i32 %sr_3, -1
+ // ; %tmp12 = icmp eq i32 %sr_2, 0
+ // ; br i1 %tmp12, label %loop-exit, label %do-while
+ Builder.SetInsertPoint(DoWhile);
+ PHINode *Carry_1 = Builder.CreatePHI(I32Ty, 2);
+ PHINode *SR_3 = Builder.CreatePHI(I32Ty, 2);
+ PHINode *R_1 = Builder.CreatePHI(I32Ty, 2);
+ PHINode *Q_2 = Builder.CreatePHI(I32Ty, 2);
+ Value *Tmp5 = Builder.CreateShl(R_1, One);
+ Value *Tmp6 = Builder.CreateLShr(Q_2, ThirtyOne);
+ Value *Tmp7 = Builder.CreateOr(Tmp5, Tmp6);
+ Value *Tmp8 = Builder.CreateShl(Q_2, One);
+ Value *Q_1 = Builder.CreateOr(Carry_1, Tmp8);
+ Value *Tmp9 = Builder.CreateSub(Tmp4, Tmp7);
+ Value *Tmp10 = Builder.CreateAShr(Tmp9, 31);
+ Value *Carry = Builder.CreateAnd(Tmp10, One);
+ Value *Tmp11 = Builder.CreateAnd(Tmp10, Divisor);
+ Value *R = Builder.CreateSub(Tmp7, Tmp11);
+ Value *SR_2 = Builder.CreateAdd(SR_3, NegOne);
+ Value *Tmp12 = Builder.CreateICmpEQ(SR_2, Zero);
+ Builder.CreateCondBr(Tmp12, LoopExit, DoWhile);
+
+ // ; loop-exit: ; preds = %do-while, %bb1
+ // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ]
+ // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ]
+ // ; %tmp13 = shl i32 %q_3, 1
+ // ; %q_4 = or i32 %carry_2, %tmp13
+ // ; br label %end
+ Builder.SetInsertPoint(LoopExit);
+ PHINode *Carry_2 = Builder.CreatePHI(I32Ty, 2);
+ PHINode *Q_3 = Builder.CreatePHI(I32Ty, 2);
+ Value *Tmp13 = Builder.CreateShl(Q_3, One);
+ Value *Q_4 = Builder.CreateOr(Carry_2, Tmp13);
+ Builder.CreateBr(End);
+
+ // ; end: ; preds = %loop-exit, %special-cases
+ // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
+ // ; ret i32 %q_5
+ Builder.SetInsertPoint(End, End->begin());
+ PHINode *Q_5 = Builder.CreatePHI(I32Ty, 2);
+
+ // Populate the Phis, since all values have now been created. Our Phis were:
+ // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
+ Carry_1->addIncoming(Zero, Preheader);
+ Carry_1->addIncoming(Carry, DoWhile);
+ // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ]
+ SR_3->addIncoming(SR_1, Preheader);
+ SR_3->addIncoming(SR_2, DoWhile);
+ // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ]
+ R_1->addIncoming(Tmp3, Preheader);
+ R_1->addIncoming(R, DoWhile);
+ // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ]
+ Q_2->addIncoming(Q, Preheader);
+ Q_2->addIncoming(Q_1, DoWhile);
+ // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ]
+ Carry_2->addIncoming(Zero, BB1);
+ Carry_2->addIncoming(Carry, DoWhile);
+ // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ]
+ Q_3->addIncoming(Q, BB1);
+ Q_3->addIncoming(Q_1, DoWhile);
+ // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
+ Q_5->addIncoming(Q_4, LoopExit);
+ Q_5->addIncoming(RetVal, SpecialCases);
+
+ return Q_5;
+}
+
+/// Generate code to calculate the remainder of two integers, replacing Rem with
+/// the generated code. This currently generates code using the udiv expansion,
+/// but future work includes generating more specialized code, e.g. when more
+/// information about the operands are known. Currently only implements 32bit
+/// scalar division (due to udiv's limitation), but future work is removing this
+/// limitation.
+///
+/// @brief Replace Rem with generated code.
+bool llvm::expandRemainder(BinaryOperator *Rem) {
+ assert((Rem->getOpcode() == Instruction::SRem ||
+ Rem->getOpcode() == Instruction::URem) &&
+ "Trying to expand remainder from a non-remainder function");
+
+ IRBuilder<> Builder(Rem);
+
+ // First prepare the sign if it's a signed remainder
+ if (Rem->getOpcode() == Instruction::SRem) {
+ Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0),
+ Rem->getOperand(1), Builder);
+
+ Rem->replaceAllUsesWith(Remainder);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ // If we didn't actually generate a udiv instruction, we're done
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
+ if (!BO || BO->getOpcode() != Instruction::URem)
+ return true;
+
+ Rem = BO;
+ }
+
+ Value *Remainder = generatedUnsignedRemainderCode(Rem->getOperand(0),
+ Rem->getOperand(1),
+ Builder);
+
+ Rem->replaceAllUsesWith(Remainder);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ // Expand the udiv
+ if (BinaryOperator *UDiv = dyn_cast<BinaryOperator>(Builder.GetInsertPoint())) {
+ assert(UDiv->getOpcode() == Instruction::UDiv && "Non-udiv in expansion?");
+ expandDivision(UDiv);
+ }
+
+ return true;
+}
+
+
+/// Generate code to divide two integers, replacing Div with the generated
+/// code. This currently generates code similarly to compiler-rt's
+/// implementations, but future work includes generating more specialized code
+/// when more information about the operands are known. Currently only
+/// implements 32bit scalar division, but future work is removing this
+/// limitation.
+///
+/// @brief Replace Div with generated code.
+bool llvm::expandDivision(BinaryOperator *Div) {
+ assert((Div->getOpcode() == Instruction::SDiv ||
+ Div->getOpcode() == Instruction::UDiv) &&
+ "Trying to expand division from a non-division function");
+
+ IRBuilder<> Builder(Div);
+
+ if (Div->getType()->isVectorTy())
+ llvm_unreachable("Div over vectors not supported");
+
+ // First prepare the sign if it's a signed division
+ if (Div->getOpcode() == Instruction::SDiv) {
+ // Lower the code to unsigned division, and reset Div to point to the udiv.
+ Value *Quotient = generateSignedDivisionCode(Div->getOperand(0),
+ Div->getOperand(1), Builder);
+ Div->replaceAllUsesWith(Quotient);
+ Div->dropAllReferences();
+ Div->eraseFromParent();
+
+ // If we didn't actually generate a udiv instruction, we're done
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
+ if (!BO || BO->getOpcode() != Instruction::UDiv)
+ return true;
+
+ Div = BO;
+ }
+
+ // Insert the unsigned division code
+ Value *Quotient = generateUnsignedDivisionCode(Div->getOperand(0),
+ Div->getOperand(1),
+ Builder);
+ Div->replaceAllUsesWith(Quotient);
+ Div->dropAllReferences();
+ Div->eraseFromParent();
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
index b654111..5e05c83 100644
--- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -53,6 +53,8 @@ namespace {
// Cached analysis information for the current function.
DominatorTree *DT;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
std::vector<BasicBlock*> LoopBlocks;
PredIteratorCache PredCache;
Loop *L;
@@ -117,6 +119,8 @@ bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
L = TheLoop;
DT = &getAnalysis<DominatorTree>();
+ LI = &getAnalysis<LoopInfo>();
+ SE = getAnalysisIfAvailable<ScalarEvolution>();
// Get the set of exiting blocks.
SmallVector<BasicBlock*, 8> ExitBlocks;
@@ -156,6 +160,12 @@ bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
MadeChange |= ProcessInstruction(I, ExitBlocks);
}
}
+
+ // If we modified the code, remove any caches about the loop from SCEV to
+ // avoid dangling entries.
+ // FIXME: This is a big hammer, can we clear the cache more selectively?
+ if (SE && MadeChange)
+ SE->forgetLoop(L);
assert(L->isLCSSAForm(*DT));
PredCache.clear();
@@ -245,7 +255,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
// Remember that this phi makes the value alive in this block.
SSAUpdate.AddAvailableValue(ExitBB, PN);
}
-
+
// Rewrite all uses outside the loop in terms of the new PHIs we just
// inserted.
for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) {
@@ -260,6 +270,9 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
if (isa<PHINode>(UserBB->begin()) &&
isExitBlock(UserBB, ExitBlocks)) {
+ // Tell the VHs that the uses changed. This updates SCEV's caches.
+ if (UsesToRewrite[i]->get()->hasValueHandle())
+ ValueHandleBase::ValueIsRAUWd(*UsesToRewrite[i], UserBB->begin());
UsesToRewrite[i]->set(UserBB->begin());
continue;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index bed7d72..a954d82 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -23,6 +23,7 @@
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Intrinsics.h"
+#include "llvm/MDBuilder.h"
#include "llvm/Metadata.h"
#include "llvm/Operator.h"
#include "llvm/ADT/DenseMap.h"
@@ -38,7 +39,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -52,7 +53,8 @@ using namespace llvm;
/// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch
/// conditions and indirectbr addresses this might make dead if
/// DeleteDeadConditions is true.
-bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
+bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
+ const TargetLibraryInfo *TLI) {
TerminatorInst *T = BB->getTerminator();
IRBuilder<> Builder(T);
@@ -96,7 +98,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
Value *Cond = BI->getCondition();
BI->eraseFromParent();
if (DeleteDeadConditions)
- RecursivelyDeleteTriviallyDeadInstructions(Cond);
+ RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
return true;
}
return false;
@@ -121,6 +123,27 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
// Check to see if this branch is going to the same place as the default
// dest. If so, eliminate it as an explicit compare.
if (i.getCaseSuccessor() == DefaultDest) {
+ MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
+ // MD should have 2 + NumCases operands.
+ if (MD && MD->getNumOperands() == 2 + SI->getNumCases()) {
+ // Collect branch weights into a vector.
+ SmallVector<uint32_t, 8> Weights;
+ for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
+ ++MD_i) {
+ ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(MD_i));
+ assert(CI);
+ Weights.push_back(CI->getValue().getZExtValue());
+ }
+ // Merge weight of this case to the default weight.
+ unsigned idx = i.getCaseIndex();
+ Weights[0] += Weights[idx+1];
+ // Remove weight for this case.
+ std::swap(Weights[idx+1], Weights.back());
+ Weights.pop_back();
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BB->getContext()).
+ createBranchWeights(Weights));
+ }
// Remove this entry.
DefaultDest->removePredecessor(SI->getParent());
SI->removeCase(i);
@@ -161,7 +184,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
Value *Cond = SI->getCondition();
SI->eraseFromParent();
if (DeleteDeadConditions)
- RecursivelyDeleteTriviallyDeadInstructions(Cond);
+ RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
return true;
}
@@ -177,8 +200,20 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
"cond");
// Insert the new branch.
- Builder.CreateCondBr(Cond, FirstCase.getCaseSuccessor(),
- SI->getDefaultDest());
+ BranchInst *NewBr = Builder.CreateCondBr(Cond,
+ FirstCase.getCaseSuccessor(),
+ SI->getDefaultDest());
+ MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
+ if (MD && MD->getNumOperands() == 3) {
+ ConstantInt *SICase = dyn_cast<ConstantInt>(MD->getOperand(2));
+ ConstantInt *SIDef = dyn_cast<ConstantInt>(MD->getOperand(1));
+ assert(SICase && SIDef);
+ // The TrueWeight should be the weight for the single case of SI.
+ NewBr->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BB->getContext()).
+ createBranchWeights(SICase->getValue().getZExtValue(),
+ SIDef->getValue().getZExtValue()));
+ }
// Delete the old switch.
SI->eraseFromParent();
@@ -205,7 +240,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
Value *Address = IBI->getAddress();
IBI->eraseFromParent();
if (DeleteDeadConditions)
- RecursivelyDeleteTriviallyDeadInstructions(Address);
+ RecursivelyDeleteTriviallyDeadInstructions(Address, TLI);
// If we didn't find our destination in the IBI successor list, then we
// have undefined behavior. Replace the unconditional branch with an
@@ -230,7 +265,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
/// isInstructionTriviallyDead - Return true if the result produced by the
/// instruction is not used, and the instruction has no side effects.
///
-bool llvm::isInstructionTriviallyDead(Instruction *I) {
+bool llvm::isInstructionTriviallyDead(Instruction *I,
+ const TargetLibraryInfo *TLI) {
if (!I->use_empty() || isa<TerminatorInst>(I)) return false;
// We don't want the landingpad instruction removed by anything this general.
@@ -265,9 +301,9 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) {
return isa<UndefValue>(II->getArgOperand(1));
}
- if (isAllocLikeFn(I)) return true;
+ if (isAllocLikeFn(I, TLI)) return true;
- if (CallInst *CI = isFreeCall(I))
+ if (CallInst *CI = isFreeCall(I, TLI))
if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0)))
return C->isNullValue() || isa<UndefValue>(C);
@@ -278,9 +314,11 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) {
/// trivially dead instruction, delete it. If that makes any of its operands
/// trivially dead, delete them too, recursively. Return true if any
/// instructions were deleted.
-bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {
+bool
+llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
+ const TargetLibraryInfo *TLI) {
Instruction *I = dyn_cast<Instruction>(V);
- if (!I || !I->use_empty() || !isInstructionTriviallyDead(I))
+ if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI))
return false;
SmallVector<Instruction*, 16> DeadInsts;
@@ -301,7 +339,7 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {
// operand, and if it is 'trivially' dead, delete it in a future loop
// iteration.
if (Instruction *OpI = dyn_cast<Instruction>(OpV))
- if (isInstructionTriviallyDead(OpI))
+ if (isInstructionTriviallyDead(OpI, TLI))
DeadInsts.push_back(OpI);
}
@@ -334,19 +372,20 @@ static bool areAllUsesEqual(Instruction *I) {
/// either forms a cycle or is terminated by a trivially dead instruction,
/// delete it. If that makes any of its operands trivially dead, delete them
/// too, recursively. Return true if a change was made.
-bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
+bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
+ const TargetLibraryInfo *TLI) {
SmallPtrSet<Instruction*, 4> Visited;
for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects();
I = cast<Instruction>(*I->use_begin())) {
if (I->use_empty())
- return RecursivelyDeleteTriviallyDeadInstructions(I);
+ return RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
// If we find an instruction more than once, we're on a cycle that
// won't prove fruitful.
if (!Visited.insert(I)) {
// Break the cycle and delete the instruction and its operands.
I->replaceAllUsesWith(UndefValue::get(I->getType()));
- (void)RecursivelyDeleteTriviallyDeadInstructions(I);
+ (void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
return true;
}
}
@@ -358,7 +397,8 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
///
/// This returns true if it changed the code, note that it can delete
/// instructions in other blocks as well in this block.
-bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) {
+bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
bool MadeChange = false;
#ifndef NDEBUG
@@ -381,7 +421,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) {
continue;
}
- MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst);
+ MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
if (BIHandle != BI)
BI = BB->begin();
}
@@ -405,7 +445,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) {
/// .. and delete the predecessor corresponding to the '1', this will attempt to
/// recursively fold the and to 0.
void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
- TargetData *TD) {
+ DataLayout *TD) {
// This only adjusts blocks with PHI nodes.
if (!isa<PHINode>(BB->begin()))
return;
@@ -720,7 +760,7 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
/// their preferred alignment from the beginning.
///
static unsigned enforceKnownAlignment(Value *V, unsigned Align,
- unsigned PrefAlign, const TargetData *TD) {
+ unsigned PrefAlign, const DataLayout *TD) {
V = V->stripPointerCasts();
if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
@@ -763,7 +803,7 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
/// and it is more than the alignment of the ultimate object, see if we can
/// increase the alignment of the ultimate object, making this check succeed.
unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
- const TargetData *TD) {
+ const DataLayout *TD) {
assert(V->getType()->isPointerTy() &&
"getOrEnforceKnownAlignment expects a pointer!");
unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index 0bc185d..9d9e201 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -46,6 +46,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/Type.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
@@ -89,6 +90,7 @@ namespace {
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<DependenceAnalysis>();
AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
}
@@ -194,6 +196,11 @@ ReprocessLoop:
BI->setCondition(ConstantInt::get(Cond->getType(),
!L->contains(BI->getSuccessor(0))));
+
+ // This may make the loop analyzable, force SCEV recomputation.
+ if (SE)
+ SE->forgetLoop(L);
+
Changed = true;
}
}
diff --git a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
new file mode 100644
index 0000000..233bc12
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
@@ -0,0 +1,132 @@
+//===- MetaRenamer.cpp - Rename everything with metasyntatic names --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass renames everything with metasyntatic names. The intent is to use
+// this pass after bugpoint reduction to conceal the nature of the original
+// program.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/TypeFinder.h"
+
+using namespace llvm;
+
+namespace {
+
+ // This PRNG is from the ISO C spec. It is intentionally simple and
+ // unsuitable for cryptographic use. We're just looking for enough
+ // variety to surprise and delight users.
+ struct PRNG {
+ unsigned long next;
+
+ void srand(unsigned int seed) {
+ next = seed;
+ }
+
+ int rand(void) {
+ next = next * 1103515245 + 12345;
+ return (unsigned int)(next / 65536) % 32768;
+ }
+ };
+
+ struct MetaRenamer : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ MetaRenamer() : ModulePass(ID) {
+ initializeMetaRenamerPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ bool runOnModule(Module &M) {
+ static const char *metaNames[] = {
+ // See http://en.wikipedia.org/wiki/Metasyntactic_variable
+ "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
+ "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam"
+ };
+
+ // Seed our PRNG with simple additive sum of ModuleID. We're looking to
+ // simply avoid always having the same function names, and we need to
+ // remain deterministic.
+ unsigned int randSeed = 0;
+ for (std::string::const_iterator I = M.getModuleIdentifier().begin(),
+ E = M.getModuleIdentifier().end(); I != E; ++I)
+ randSeed += *I;
+
+ PRNG prng;
+ prng.srand(randSeed);
+
+ // Rename all aliases
+ for (Module::alias_iterator AI = M.alias_begin(), AE = M.alias_end();
+ AI != AE; ++AI)
+ AI->setName("alias");
+
+ // Rename all global variables
+ for (Module::global_iterator GI = M.global_begin(), GE = M.global_end();
+ GI != GE; ++GI)
+ GI->setName("global");
+
+ // Rename all struct types
+ TypeFinder StructTypes;
+ StructTypes.run(M, true);
+ for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
+ StructType *STy = StructTypes[i];
+ if (STy->isLiteral() || STy->getName().empty()) continue;
+
+ SmallString<128> NameStorage;
+ STy->setName((Twine("struct.") + metaNames[prng.rand() %
+ array_lengthof(metaNames)]).toStringRef(NameStorage));
+ }
+
+ // Rename all functions
+ for (Module::iterator FI = M.begin(), FE = M.end();
+ FI != FE; ++FI) {
+ FI->setName(metaNames[prng.rand() % array_lengthof(metaNames)]);
+ runOnFunction(*FI);
+ }
+ return true;
+ }
+
+ bool runOnFunction(Function &F) {
+ for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
+ AI != AE; ++AI)
+ if (!AI->getType()->isVoidTy())
+ AI->setName("arg");
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ BB->setName("bb");
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (!I->getType()->isVoidTy())
+ I->setName("tmp");
+ }
+ return true;
+ }
+ };
+}
+
+char MetaRenamer::ID = 0;
+INITIALIZE_PASS(MetaRenamer, "metarenamer",
+ "Assign new names to everything", false, false)
+//===----------------------------------------------------------------------===//
+//
+// MetaRenamer - Rename everything with metasyntactic names.
+//
+ModulePass *llvm::createMetaRenamerPass() {
+ return new MetaRenamer();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index dd5e20e..558de9d 100644
--- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -212,9 +212,13 @@ namespace {
///
DenseMap<AllocaInst*, unsigned> AllocaLookup;
- /// NewPhiNodes - The PhiNodes we're adding.
+ /// NewPhiNodes - The PhiNodes we're adding. That map is used to simplify
+ /// some Phi nodes as we iterate over it, so it should have deterministic
+ /// iterators. We could use a MapVector, but since we already maintain a
+ /// map from BasicBlock* to a stable numbering (BBNumbers), the DenseMap is
+ /// more efficient (also supports removal).
///
- DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes;
+ DenseMap<std::pair<unsigned, unsigned>, PHINode*> NewPhiNodes;
/// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas
/// it corresponds to.
@@ -588,7 +592,11 @@ void PromoteMem2Reg::run() {
while (EliminatedAPHI) {
EliminatedAPHI = false;
- for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
+ // Iterating over NewPhiNodes is deterministic, so it is safe to try to
+ // simplify and RAUW them as we go. If it was not, we could add uses to
+ // the values we replace with in a non deterministic order, thus creating
+ // non deterministic def->use chains.
+ for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I =
NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) {
PHINode *PN = I->second;
@@ -612,7 +620,7 @@ void PromoteMem2Reg::run() {
// have incoming values for all predecessors. Loop over all PHI nodes we have
// created, inserting undef values if they are missing any incoming values.
//
- for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
+ for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I =
NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) {
// We want to do this once per basic block. As such, only process a block
// when we find the PHI that is the first entry in the block.
@@ -992,7 +1000,7 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
unsigned &Version) {
// Look up the basic-block in question.
- PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)];
+ PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)];
// If the BB already has a phi node added for the i'th alloca then we're done!
if (PN) return false;
diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index e568a61..72d4199 100644
--- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -39,7 +39,7 @@ SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
: AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {}
SSAUpdater::~SSAUpdater() {
- delete &getAvailableVals(AV);
+ delete static_cast<AvailableValsTy*>(AV);
}
/// Initialize - Reset this object to get ready for a new set of SSA
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 518df7c..c767da6 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -14,6 +14,7 @@
#define DEBUG_TYPE "simplifycfg"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
#include "llvm/DerivedTypes.h"
#include "llvm/GlobalVariable.h"
#include "llvm/IRBuilder.h"
@@ -22,6 +23,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/MDBuilder.h"
#include "llvm/Metadata.h"
+#include "llvm/Module.h"
#include "llvm/Operator.h"
#include "llvm/Type.h"
#include "llvm/ADT/DenseMap.h"
@@ -38,7 +40,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/NoFolder.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/TargetTransformInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <algorithm>
#include <set>
@@ -53,6 +55,13 @@ static cl::opt<bool>
DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
cl::desc("Duplicate return instructions into unconditional branches"));
+static cl::opt<bool>
+SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
+ cl::desc("Sink common instructions down to the end block"));
+
+STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
+STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
+STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
STATISTIC(NumSpeculations, "Number of speculative executed instructions");
namespace {
@@ -68,10 +77,13 @@ namespace {
// Comparing pointers is ok as we only rely on the order for uniquing.
return Value < RHS.Value;
}
+
+ bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
};
class SimplifyCFGOpt {
- const TargetData *const TD;
+ const DataLayout *const TD;
+ const TargetTransformInfo *const TTI;
Value *isValueEqualityComparison(TerminatorInst *TI);
BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
@@ -91,7 +103,8 @@ class SimplifyCFGOpt {
bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
public:
- explicit SimplifyCFGOpt(const TargetData *td) : TD(td) {}
+ SimplifyCFGOpt(const DataLayout *td, const TargetTransformInfo *tti)
+ : TD(td), TTI(tti) {}
bool run(BasicBlock *BB);
};
}
@@ -101,14 +114,14 @@ public:
///
static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
if (SI1 == SI2) return false; // Can't merge with self!
-
+
// It is not safe to merge these two switch instructions if they have a common
// successor, and if that successor has a PHI node, and if *that* PHI node has
// conflicting incoming values from the two switch blocks.
BasicBlock *SI1BB = SI1->getParent();
BasicBlock *SI2BB = SI2->getParent();
SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
-
+
for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I)
if (SI1Succs.count(*I))
for (BasicBlock::iterator BBI = (*I)->begin();
@@ -118,7 +131,7 @@ static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
PN->getIncomingValueForBlock(SI2BB))
return false;
}
-
+
return true;
}
@@ -135,7 +148,7 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1,
assert(SI1->isUnconditional() && SI2->isConditional());
// We fold the unconditional branch if we can easily update all PHI nodes in
- // common successors:
+ // common successors:
// 1> We have a constant incoming value for the conditional branch;
// 2> We have "Cond" as the incoming value for the unconditional branch;
// 3> SI2->getCondition() and Cond have same operands.
@@ -170,7 +183,7 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1,
static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
BasicBlock *ExistPred) {
if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do
-
+
PHINode *PN;
for (BasicBlock::iterator I = Succ->begin();
(PN = dyn_cast<PHINode>(I)); ++I)
@@ -222,7 +235,7 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
// doesn't dominate BB.
if (Pred2->getSinglePredecessor() == 0)
return 0;
-
+
// If we found a conditional branch predecessor, make sure that it branches
// to BB and Pred2Br. If it doesn't, this isn't an "if statement".
if (Pred1Br->getSuccessor(0) == BB &&
@@ -252,7 +265,7 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
// Otherwise, if this is a conditional branch, then we can use it!
BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
if (BI == 0) return 0;
-
+
assert(BI->isConditional() && "Two successors but not conditional?");
if (BI->getSuccessor(0) == Pred1) {
IfTrue = Pred1;
@@ -345,7 +358,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// If we aren't allowing aggressive promotion anymore, then don't consider
// instructions in the 'if region'.
if (AggressiveInsts == 0) return false;
-
+
// If we have seen this instruction before, don't count it again.
if (AggressiveInsts->count(I)) return true;
@@ -374,7 +387,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
/// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr
/// and PointerNullValue. Return NULL if value is not a constant int.
-static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) {
+static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) {
// Normal constant int.
ConstantInt *CI = dyn_cast<ConstantInt>(V);
if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy())
@@ -382,7 +395,7 @@ static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) {
// This is some kind of pointer constant. Turn it into a pointer-sized
// ConstantInt if possible.
- IntegerType *PtrTy = TD->getIntPtrType(V->getContext());
+ IntegerType *PtrTy = cast<IntegerType>(TD->getIntPtrType(V->getType()));
// Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
if (isa<ConstantPointerNull>(V))
@@ -408,10 +421,10 @@ static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) {
/// Values vector.
static Value *
GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
- const TargetData *TD, bool isEQ, unsigned &UsedICmps) {
+ const DataLayout *TD, bool isEQ, unsigned &UsedICmps) {
Instruction *I = dyn_cast<Instruction>(V);
if (I == 0) return 0;
-
+
// If this is an icmp against a constant, handle this as one of the cases.
if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) {
@@ -420,21 +433,21 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
Vals.push_back(C);
return I->getOperand(0);
}
-
+
// If we have "x ult 3" comparison, for example, then we can add 0,1,2 to
// the set.
ConstantRange Span =
ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue());
-
+
// If this is an and/!= check then we want to optimize "x ugt 2" into
// x != 0 && x != 1.
if (!isEQ)
Span = Span.inverse();
-
+
// If there are a ton of values, we don't want to make a ginormous switch.
if (Span.getSetSize().ugt(8) || Span.isEmptySet())
return 0;
-
+
for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
Vals.push_back(ConstantInt::get(V->getContext(), Tmp));
UsedICmps++;
@@ -442,11 +455,11 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
}
return 0;
}
-
+
// Otherwise, we can only handle an | or &, depending on isEQ.
if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And))
return 0;
-
+
unsigned NumValsBeforeLHS = Vals.size();
unsigned UsedICmpsBeforeLHS = UsedICmps;
if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD,
@@ -467,12 +480,12 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
Extra = I->getOperand(1);
return LHS;
}
-
+
Vals.resize(NumValsBeforeLHS);
UsedICmps = UsedICmpsBeforeLHS;
return 0;
}
-
+
// If the LHS can't be folded in, but Extra is available and RHS can, try to
// use LHS as Extra.
if (Extra == 0 || Extra == I->getOperand(0)) {
@@ -484,7 +497,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
assert(Vals.size() == NumValsBeforeLHS);
Extra = OldExtra;
}
-
+
return 0;
}
@@ -556,11 +569,7 @@ GetValueEqualityComparisonCases(TerminatorInst *TI,
/// in the list that match the specified block.
static void EliminateBlockCases(BasicBlock *BB,
std::vector<ValueEqualityComparisonCase> &Cases) {
- for (unsigned i = 0, e = Cases.size(); i != e; ++i)
- if (Cases[i].Dest == BB) {
- Cases.erase(Cases.begin()+i);
- --i; --e;
- }
+ Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end());
}
/// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as
@@ -615,6 +624,9 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
assert(ThisVal && "This isn't a value comparison!!");
if (ThisVal != PredVal) return false; // Different predicates.
+ // TODO: Preserve branch weight metadata, similarly to how
+ // FoldValueComparisonIntoPredecessors preserves it.
+
// Find out information about when control will move from Pred to TI's block.
std::vector<ValueEqualityComparisonCase> PredCases;
BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(),
@@ -634,7 +646,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
// can simplify TI.
if (!ValuesOverlap(PredCases, ThisCases))
return false;
-
+
if (isa<BranchInst>(TI)) {
// Okay, one of the successors of this condbr is dead. Convert it to a
// uncond br.
@@ -652,7 +664,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
EraseTerminatorInstAndDCECond(TI);
return true;
}
-
+
SwitchInst *SI = cast<SwitchInst>(TI);
// Okay, TI has cases that are statically dead, prune them away.
SmallPtrSet<Constant*, 16> DeadCases;
@@ -662,18 +674,37 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
<< "Through successor TI: " << *TI);
+ // Collect branch weights into a vector.
+ SmallVector<uint32_t, 8> Weights;
+ MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
+ bool HasWeight = MD && (MD->getNumOperands() == 2 + SI->getNumCases());
+ if (HasWeight)
+ for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
+ ++MD_i) {
+ ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(MD_i));
+ assert(CI);
+ Weights.push_back(CI->getValue().getZExtValue());
+ }
for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
--i;
if (DeadCases.count(i.getCaseValue())) {
+ if (HasWeight) {
+ std::swap(Weights[i.getCaseIndex()+1], Weights.back());
+ Weights.pop_back();
+ }
i.getCaseSuccessor()->removePredecessor(TI->getParent());
SI->removeCase(i);
}
}
+ if (HasWeight && Weights.size() >= 2)
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getParent()->getContext()).
+ createBranchWeights(Weights));
DEBUG(dbgs() << "Leaving: " << *TI << "\n");
return true;
}
-
+
// Otherwise, TI's block must correspond to some matched value. Find out
// which value (or set of values) this is.
ConstantInt *TIV = 0;
@@ -729,8 +760,8 @@ namespace {
}
static int ConstantIntSortPredicate(const void *P1, const void *P2) {
- const ConstantInt *LHS = *(const ConstantInt**)P1;
- const ConstantInt *RHS = *(const ConstantInt**)P2;
+ const ConstantInt *LHS = *(const ConstantInt*const*)P1;
+ const ConstantInt *RHS = *(const ConstantInt*const*)P2;
if (LHS->getValue().ult(RHS->getValue()))
return 1;
if (LHS->getValue() == RHS->getValue())
@@ -738,6 +769,56 @@ static int ConstantIntSortPredicate(const void *P1, const void *P2) {
return -1;
}
+static inline bool HasBranchWeights(const Instruction* I) {
+ MDNode* ProfMD = I->getMetadata(LLVMContext::MD_prof);
+ if (ProfMD && ProfMD->getOperand(0))
+ if (MDString* MDS = dyn_cast<MDString>(ProfMD->getOperand(0)))
+ return MDS->getString().equals("branch_weights");
+
+ return false;
+}
+
+/// Get Weights of a given TerminatorInst, the default weight is at the front
+/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
+/// metadata.
+static void GetBranchWeights(TerminatorInst *TI,
+ SmallVectorImpl<uint64_t> &Weights) {
+ MDNode* MD = TI->getMetadata(LLVMContext::MD_prof);
+ assert(MD);
+ for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
+ ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(i));
+ assert(CI);
+ Weights.push_back(CI->getValue().getZExtValue());
+ }
+
+ // If TI is a conditional eq, the default case is the false case,
+ // and the corresponding branch-weight data is at index 2. We swap the
+ // default weight to be the first entry.
+ if (BranchInst* BI = dyn_cast<BranchInst>(TI)) {
+ assert(Weights.size() == 2);
+ ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ std::swap(Weights.front(), Weights.back());
+ }
+}
+
+/// Sees if any of the weights are too big for a uint32_t, and halves all the
+/// weights if any are.
+static void FitWeights(MutableArrayRef<uint64_t> Weights) {
+ bool Halve = false;
+ for (unsigned i = 0; i < Weights.size(); ++i)
+ if (Weights[i] > UINT_MAX) {
+ Halve = true;
+ break;
+ }
+
+ if (! Halve)
+ return;
+
+ for (unsigned i = 0; i < Weights.size(); ++i)
+ Weights[i] /= 2;
+}
+
/// FoldValueComparisonIntoPredecessors - The specified terminator is a value
/// equality comparison instruction (either a switch or a branch on "X == c").
/// See if any of the predecessors of the terminator block are value comparisons
@@ -770,6 +851,31 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
// build.
SmallVector<BasicBlock*, 8> NewSuccessors;
+ // Update the branch weight metadata along the way
+ SmallVector<uint64_t, 8> Weights;
+ bool PredHasWeights = HasBranchWeights(PTI);
+ bool SuccHasWeights = HasBranchWeights(TI);
+
+ if (PredHasWeights) {
+ GetBranchWeights(PTI, Weights);
+ // branch-weight metadata is inconsistant here.
+ if (Weights.size() != 1 + PredCases.size())
+ PredHasWeights = SuccHasWeights = false;
+ } else if (SuccHasWeights)
+ // If there are no predecessor weights but there are successor weights,
+ // populate Weights with 1, which will later be scaled to the sum of
+ // successor's weights
+ Weights.assign(1 + PredCases.size(), 1);
+
+ SmallVector<uint64_t, 8> SuccWeights;
+ if (SuccHasWeights) {
+ GetBranchWeights(TI, SuccWeights);
+ // branch-weight metadata is inconsistant here.
+ if (SuccWeights.size() != 1 + BBCases.size())
+ PredHasWeights = SuccHasWeights = false;
+ } else if (PredHasWeights)
+ SuccWeights.assign(1 + BBCases.size(), 1);
+
if (PredDefault == BB) {
// If this is the default destination from PTI, only the edges in TI
// that don't occur in PTI, or that branch to BB will be activated.
@@ -780,6 +886,14 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
else {
// The default destination is BB, we don't need explicit targets.
std::swap(PredCases[i], PredCases.back());
+
+ if (PredHasWeights || SuccHasWeights) {
+ // Increase weight for the default case.
+ Weights[0] += Weights[i+1];
+ std::swap(Weights[i+1], Weights.back());
+ Weights.pop_back();
+ }
+
PredCases.pop_back();
--i; --e;
}
@@ -790,21 +904,47 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
PredDefault = BBDefault;
NewSuccessors.push_back(BBDefault);
}
+
+ unsigned CasesFromPred = Weights.size();
+ uint64_t ValidTotalSuccWeight = 0;
for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
if (!PTIHandled.count(BBCases[i].Value) &&
BBCases[i].Dest != BBDefault) {
PredCases.push_back(BBCases[i]);
NewSuccessors.push_back(BBCases[i].Dest);
+ if (SuccHasWeights || PredHasWeights) {
+ // The default weight is at index 0, so weight for the ith case
+ // should be at index i+1. Scale the cases from successor by
+ // PredDefaultWeight (Weights[0]).
+ Weights.push_back(Weights[0] * SuccWeights[i+1]);
+ ValidTotalSuccWeight += SuccWeights[i+1];
+ }
}
+ if (SuccHasWeights || PredHasWeights) {
+ ValidTotalSuccWeight += SuccWeights[0];
+ // Scale the cases from predecessor by ValidTotalSuccWeight.
+ for (unsigned i = 1; i < CasesFromPred; ++i)
+ Weights[i] *= ValidTotalSuccWeight;
+ // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
+ Weights[0] *= SuccWeights[0];
+ }
} else {
// If this is not the default destination from PSI, only the edges
// in SI that occur in PSI with a destination of BB will be
// activated.
std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
+ std::map<ConstantInt*, uint64_t> WeightsForHandled;
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
if (PredCases[i].Dest == BB) {
PTIHandled.insert(PredCases[i].Value);
+
+ if (PredHasWeights || SuccHasWeights) {
+ WeightsForHandled[PredCases[i].Value] = Weights[i+1];
+ std::swap(Weights[i+1], Weights.back());
+ Weights.pop_back();
+ }
+
std::swap(PredCases[i], PredCases.back());
PredCases.pop_back();
--i; --e;
@@ -815,6 +955,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
if (PTIHandled.count(BBCases[i].Value)) {
// If this is one we are capable of getting...
+ if (PredHasWeights || SuccHasWeights)
+ Weights.push_back(WeightsForHandled[BBCases[i].Value]);
PredCases.push_back(BBCases[i]);
NewSuccessors.push_back(BBCases[i].Dest);
PTIHandled.erase(BBCases[i].Value);// This constant is taken care of
@@ -822,9 +964,11 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
// If there are any constants vectored to BB that TI doesn't handle,
// they must go to the default destination of TI.
- for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I =
+ for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I =
PTIHandled.begin(),
E = PTIHandled.end(); I != E; ++I) {
+ if (PredHasWeights || SuccHasWeights)
+ Weights.push_back(WeightsForHandled[*I]);
PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault));
NewSuccessors.push_back(BBDefault);
}
@@ -839,7 +983,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
Builder.SetInsertPoint(PTI);
// Convert pointer to int before we switch.
if (CV->getType()->isPointerTy()) {
- assert(TD && "Cannot switch on pointer without TargetData");
+ assert(TD && "Cannot switch on pointer without DataLayout");
CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()),
"magicptr");
}
@@ -851,6 +995,17 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
NewSI->addCase(PredCases[i].Value, PredCases[i].Dest);
+ if (PredHasWeights || SuccHasWeights) {
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(Weights);
+
+ SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
+
+ NewSI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BB->getContext()).
+ createBranchWeights(MDWeights));
+ }
+
EraseTerminatorInstAndDCECond(PTI);
// Okay, last check. If BB is still a successor of PSI, then we must
@@ -984,11 +1139,11 @@ HoistTerminator:
Value *BB1V = PN->getIncomingValueForBlock(BB1);
Value *BB2V = PN->getIncomingValueForBlock(BB2);
if (BB1V == BB2V) continue;
-
+
// These values do not agree. Insert a select instruction before NT
// that determines the right value.
SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
- if (SI == 0)
+ if (SI == 0)
SI = cast<SelectInst>
(Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
BB1V->getName()+"."+BB2V->getName()));
@@ -1008,6 +1163,175 @@ HoistTerminator:
return true;
}
+/// SinkThenElseCodeToEnd - Given an unconditional branch that goes to BBEnd,
+/// check whether BBEnd has only two predecessors and the other predecessor
+/// ends with an unconditional branch. If it is true, sink any common code
+/// in the two predecessors to BBEnd.
+static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
+ assert(BI1->isUnconditional());
+ BasicBlock *BB1 = BI1->getParent();
+ BasicBlock *BBEnd = BI1->getSuccessor(0);
+
+ // Check that BBEnd has two predecessors and the other predecessor ends with
+ // an unconditional branch.
+ pred_iterator PI = pred_begin(BBEnd), PE = pred_end(BBEnd);
+ BasicBlock *Pred0 = *PI++;
+ if (PI == PE) // Only one predecessor.
+ return false;
+ BasicBlock *Pred1 = *PI++;
+ if (PI != PE) // More than two predecessors.
+ return false;
+ BasicBlock *BB2 = (Pred0 == BB1) ? Pred1 : Pred0;
+ BranchInst *BI2 = dyn_cast<BranchInst>(BB2->getTerminator());
+ if (!BI2 || !BI2->isUnconditional())
+ return false;
+
+ // Gather the PHI nodes in BBEnd.
+ std::map<Value*, std::pair<Value*, PHINode*> > MapValueFromBB1ToBB2;
+ Instruction *FirstNonPhiInBBEnd = 0;
+ for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end();
+ I != E; ++I) {
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ MapValueFromBB1ToBB2[BB1V] = std::make_pair(BB2V, PN);
+ } else {
+ FirstNonPhiInBBEnd = &*I;
+ break;
+ }
+ }
+ if (!FirstNonPhiInBBEnd)
+ return false;
+
+
+ // This does very trivial matching, with limited scanning, to find identical
+ // instructions in the two blocks. We scan backward for obviously identical
+ // instructions in an identical order.
+ BasicBlock::InstListType::reverse_iterator RI1 = BB1->getInstList().rbegin(),
+ RE1 = BB1->getInstList().rend(), RI2 = BB2->getInstList().rbegin(),
+ RE2 = BB2->getInstList().rend();
+ // Skip debug info.
+ while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1;
+ if (RI1 == RE1)
+ return false;
+ while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2;
+ if (RI2 == RE2)
+ return false;
+ // Skip the unconditional branches.
+ ++RI1;
+ ++RI2;
+
+ bool Changed = false;
+ while (RI1 != RE1 && RI2 != RE2) {
+ // Skip debug info.
+ while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1;
+ if (RI1 == RE1)
+ return Changed;
+ while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2;
+ if (RI2 == RE2)
+ return Changed;
+
+ Instruction *I1 = &*RI1, *I2 = &*RI2;
+ // I1 and I2 should have a single use in the same PHI node, and they
+ // perform the same operation.
+ // Cannot move control-flow-involving, volatile loads, vaarg, etc.
+ if (isa<PHINode>(I1) || isa<PHINode>(I2) ||
+ isa<TerminatorInst>(I1) || isa<TerminatorInst>(I2) ||
+ isa<LandingPadInst>(I1) || isa<LandingPadInst>(I2) ||
+ isa<AllocaInst>(I1) || isa<AllocaInst>(I2) ||
+ I1->mayHaveSideEffects() || I2->mayHaveSideEffects() ||
+ I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() ||
+ !I1->hasOneUse() || !I2->hasOneUse() ||
+ MapValueFromBB1ToBB2.find(I1) == MapValueFromBB1ToBB2.end() ||
+ MapValueFromBB1ToBB2[I1].first != I2)
+ return Changed;
+
+ // Check whether we should swap the operands of ICmpInst.
+ ICmpInst *ICmp1 = dyn_cast<ICmpInst>(I1), *ICmp2 = dyn_cast<ICmpInst>(I2);
+ bool SwapOpnds = false;
+ if (ICmp1 && ICmp2 &&
+ ICmp1->getOperand(0) != ICmp2->getOperand(0) &&
+ ICmp1->getOperand(1) != ICmp2->getOperand(1) &&
+ (ICmp1->getOperand(0) == ICmp2->getOperand(1) ||
+ ICmp1->getOperand(1) == ICmp2->getOperand(0))) {
+ ICmp2->swapOperands();
+ SwapOpnds = true;
+ }
+ if (!I1->isSameOperationAs(I2)) {
+ if (SwapOpnds)
+ ICmp2->swapOperands();
+ return Changed;
+ }
+
+ // The operands should be either the same or they need to be generated
+ // with a PHI node after sinking. We only handle the case where there is
+ // a single pair of different operands.
+ Value *DifferentOp1 = 0, *DifferentOp2 = 0;
+ unsigned Op1Idx = 0;
+ for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) {
+ if (I1->getOperand(I) == I2->getOperand(I))
+ continue;
+ // Early exit if we have more-than one pair of different operands or
+ // the different operand is already in MapValueFromBB1ToBB2.
+ // Early exit if we need a PHI node to replace a constant.
+ if (DifferentOp1 ||
+ MapValueFromBB1ToBB2.find(I1->getOperand(I)) !=
+ MapValueFromBB1ToBB2.end() ||
+ isa<Constant>(I1->getOperand(I)) ||
+ isa<Constant>(I2->getOperand(I))) {
+ // If we can't sink the instructions, undo the swapping.
+ if (SwapOpnds)
+ ICmp2->swapOperands();
+ return Changed;
+ }
+ DifferentOp1 = I1->getOperand(I);
+ Op1Idx = I;
+ DifferentOp2 = I2->getOperand(I);
+ }
+
+ // We insert the pair of different operands to MapValueFromBB1ToBB2 and
+ // remove (I1, I2) from MapValueFromBB1ToBB2.
+ if (DifferentOp1) {
+ PHINode *NewPN = PHINode::Create(DifferentOp1->getType(), 2,
+ DifferentOp1->getName() + ".sink",
+ BBEnd->begin());
+ MapValueFromBB1ToBB2[DifferentOp1] = std::make_pair(DifferentOp2, NewPN);
+ // I1 should use NewPN instead of DifferentOp1.
+ I1->setOperand(Op1Idx, NewPN);
+ NewPN->addIncoming(DifferentOp1, BB1);
+ NewPN->addIncoming(DifferentOp2, BB2);
+ DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";);
+ }
+ PHINode *OldPN = MapValueFromBB1ToBB2[I1].second;
+ MapValueFromBB1ToBB2.erase(I1);
+
+ DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n";);
+ DEBUG(dbgs() << " " << *I2 << "\n";);
+ // We need to update RE1 and RE2 if we are going to sink the first
+ // instruction in the basic block down.
+ bool UpdateRE1 = (I1 == BB1->begin()), UpdateRE2 = (I2 == BB2->begin());
+ // Sink the instruction.
+ BBEnd->getInstList().splice(FirstNonPhiInBBEnd, BB1->getInstList(), I1);
+ if (!OldPN->use_empty())
+ OldPN->replaceAllUsesWith(I1);
+ OldPN->eraseFromParent();
+
+ if (!I2->use_empty())
+ I2->replaceAllUsesWith(I1);
+ I1->intersectOptionalDataWith(I2);
+ I2->eraseFromParent();
+
+ if (UpdateRE1)
+ RE1 = BB1->getInstList().rend();
+ if (UpdateRE2)
+ RE2 = BB2->getInstList().rend();
+ FirstNonPhiInBBEnd = I1;
+ NumSinkCommons++;
+ Changed = true;
+ }
+ return Changed;
+}
+
/// SpeculativelyExecuteBB - Given a conditional branch that goes to BB1
/// and an BB2 and the only successor of BB1 is BB2, hoist simple code
/// (for now, restricted to a single instruction that's side effect free) from
@@ -1056,7 +1380,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
// Do not hoist the instruction if any of its operands are defined but not
// used in this BB. The transformation will prevent the operand from
// being sunk into the use block.
- for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end();
+ for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end();
i != e; ++i) {
Instruction *OpI = dyn_cast<Instruction>(*i);
if (OpI && OpI->getParent() == BIParent &&
@@ -1112,7 +1436,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
// as well.
if (PHIs.empty())
return false;
-
+
// If we get here, we can hoist the instruction and if-convert.
DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *BB1 << "\n";);
@@ -1162,13 +1486,13 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
BranchInst *BI = cast<BranchInst>(BB->getTerminator());
unsigned Size = 0;
-
+
for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
if (isa<DbgInfoIntrinsic>(BBI))
continue;
if (Size > 10) return false; // Don't clone large BB's.
++Size;
-
+
// We can only support instructions that do not define values that are
// live outside of the current basic block.
for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
@@ -1176,7 +1500,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
Instruction *U = cast<Instruction>(*UI);
if (U->getParent() != BB || isa<PHINode>(U)) return false;
}
-
+
// Looks ok, continue checking.
}
@@ -1187,38 +1511,38 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
/// that is defined in the same block as the branch and if any PHI entries are
/// constants, thread edges corresponding to that entry to be branches to their
/// ultimate destination.
-static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
+static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) {
BasicBlock *BB = BI->getParent();
PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
// NOTE: we currently cannot transform this case if the PHI node is used
// outside of the block.
if (!PN || PN->getParent() != BB || !PN->hasOneUse())
return false;
-
+
// Degenerate case of a single entry PHI.
if (PN->getNumIncomingValues() == 1) {
FoldSingleEntryPHINodes(PN->getParent());
- return true;
+ return true;
}
// Now we know that this block has multiple preds and two succs.
if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false;
-
+
// Okay, this is a simple enough basic block. See if any phi values are
// constants.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue;
-
+
// Okay, we now know that all edges from PredBB should be revectored to
// branch to RealDest.
BasicBlock *PredBB = PN->getIncomingBlock(i);
BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
-
+
if (RealDest == BB) continue; // Skip self loops.
// Skip if the predecessor's terminator is an indirect branch.
if (isa<IndirectBrInst>(PredBB->getTerminator())) continue;
-
+
// The dest block might have PHI nodes, other predecessors and other
// difficult cases. Instead of being smart about this, just insert a new
// block that jumps to the destination block, effectively splitting
@@ -1227,7 +1551,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
RealDest->getName()+".critedge",
RealDest->getParent(), RealDest);
BranchInst::Create(RealDest, EdgeBB);
-
+
// Update PHI nodes.
AddPredecessorToBlock(RealDest, EdgeBB, BB);
@@ -1244,7 +1568,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
// Clone the instruction.
Instruction *N = BBI->clone();
if (BBI->hasName()) N->setName(BBI->getName()+".c");
-
+
// Update operands due to translation.
for (User::op_iterator i = N->op_begin(), e = N->op_end();
i != e; ++i) {
@@ -1252,7 +1576,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
if (PI != TranslateMap.end())
*i = PI->second;
}
-
+
// Check for trivial simplification.
if (Value *V = SimplifyInstruction(N, TD)) {
TranslateMap[BBI] = V;
@@ -1283,7 +1607,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
/// PHI node, see if we can eliminate it.
-static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
+static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *TD) {
// Ok, this is a two entry PHI node. Check to see if this is a simple "if
// statement", which has a very simple dominance structure. Basically, we
// are trying to find the condition that is being branched on, which
@@ -1297,7 +1621,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
// Don't bother if the branch will be constant folded trivially.
isa<ConstantInt>(IfCond))
return false;
-
+
// Okay, we found that we can merge this two-entry phi node into a select.
// Doing so would require us to fold *all* two entry phi nodes in this block.
// At some point this becomes non-profitable (particularly if the target
@@ -1307,14 +1631,14 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
if (NumPhis > 2)
return false;
-
+
// Loop over the PHI's seeing if we can promote them all to select
// instructions. While we are at it, keep track of the instructions
// that need to be moved to the dominating block.
SmallPtrSet<Instruction*, 4> AggressiveInsts;
unsigned MaxCostVal0 = PHINodeFoldingThreshold,
MaxCostVal1 = PHINodeFoldingThreshold;
-
+
for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
PHINode *PN = cast<PHINode>(II++);
if (Value *V = SimplifyInstruction(PN, TD)) {
@@ -1322,19 +1646,19 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
PN->eraseFromParent();
continue;
}
-
+
if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts,
MaxCostVal0) ||
!DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts,
MaxCostVal1))
return false;
}
-
+
// If we folded the first phi, PN dangles at this point. Refresh it. If
// we ran out of PHIs then we simplified them all.
PN = dyn_cast<PHINode>(BB->begin());
if (PN == 0) return true;
-
+
// Don't fold i1 branches on PHIs which contain binary operators. These can
// often be turned into switches and other things.
if (PN->getType()->isIntegerTy(1) &&
@@ -1342,7 +1666,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
isa<BinaryOperator>(PN->getIncomingValue(1)) ||
isa<BinaryOperator>(IfCond)))
return false;
-
+
// If we all PHI nodes are promotable, check to make sure that all
// instructions in the predecessor blocks can be promoted as well. If
// not, we won't be able to get rid of the control flow, so it's not
@@ -1362,7 +1686,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
return false;
}
}
-
+
if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) {
IfBlock2 = 0;
} else {
@@ -1375,15 +1699,15 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
return false;
}
}
-
+
DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: "
<< IfTrue->getName() << " F: " << IfFalse->getName() << "\n");
-
+
// If we can still promote the PHI nodes after this gauntlet of tests,
// do all of the PHI's now.
Instruction *InsertPt = DomBlock->getTerminator();
IRBuilder<true, NoFolder> Builder(InsertPt);
-
+
// Move all 'aggressive' instructions, which are defined in the
// conditional parts of the if's up to the dominating block.
if (IfBlock1)
@@ -1394,19 +1718,19 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
DomBlock->getInstList().splice(InsertPt,
IfBlock2->getInstList(), IfBlock2->begin(),
IfBlock2->getTerminator());
-
+
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
// Change the PHI node into a select instruction.
Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
-
- SelectInst *NV =
+
+ SelectInst *NV =
cast<SelectInst>(Builder.CreateSelect(IfCond, TrueVal, FalseVal, ""));
PN->replaceAllUsesWith(NV);
NV->takeName(PN);
PN->eraseFromParent();
}
-
+
// At this point, IfBlock1 and IfBlock2 are both empty, so our if statement
// has been flattened. Change DomBlock to jump directly to our new block to
// avoid other simplifycfg's kicking in on the diamond.
@@ -1420,14 +1744,14 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
/// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes
/// to two returning blocks, try to merge them together into one return,
/// introducing a select if the return values disagree.
-static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
+static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
IRBuilder<> &Builder) {
assert(BI->isConditional() && "Must be a conditional branch");
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator());
ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator());
-
+
// Check to ensure both blocks are empty (just a return) or optionally empty
// with PHI nodes. If there are other instructions, merging would cause extra
// computation on one path or the other.
@@ -1447,12 +1771,12 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
EraseTerminatorInstAndDCECond(BI);
return true;
}
-
+
// Otherwise, figure out what the true and false return values are
// so we can insert a new select instruction.
Value *TrueValue = TrueRet->getReturnValue();
Value *FalseValue = FalseRet->getReturnValue();
-
+
// Unwrap any PHI nodes in the return blocks.
if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue))
if (TVPN->getParent() == TrueSucc)
@@ -1460,7 +1784,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue))
if (FVPN->getParent() == FalseSucc)
FalseValue = FVPN->getIncomingValueForBlock(BI->getParent());
-
+
// In order for this transformation to be safe, we must be able to
// unconditionally execute both operands to the return. This is
// normally the case, but we could have a potentially-trapping
@@ -1472,12 +1796,12 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue))
if (FCV->canTrap())
return false;
-
+
// Okay, we collected all the mapped values and checked them for sanity, and
// defined to really do this transformation. First, update the CFG.
TrueSucc->removePredecessor(BI->getParent());
FalseSucc->removePredecessor(BI->getParent());
-
+
// Insert select instructions where needed.
Value *BrCond = BI->getCondition();
if (TrueValue) {
@@ -1491,15 +1815,15 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
}
}
- Value *RI = !TrueValue ?
+ Value *RI = !TrueValue ?
Builder.CreateRetVoid() : Builder.CreateRet(TrueValue);
(void) RI;
-
+
DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
<< "\n " << *BI << "NewRet = " << *RI
<< "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc);
-
+
EraseTerminatorInstAndDCECond(BI);
return true;
@@ -1510,7 +1834,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
/// parameters and return true, or returns false if no or invalid metadata was
/// found.
static bool ExtractBranchMetadata(BranchInst *BI,
- APInt &ProbTrue, APInt &ProbFalse) {
+ uint64_t &ProbTrue, uint64_t &ProbFalse) {
assert(BI->isConditional() &&
"Looking for probabilities on unconditional branch?");
MDNode *ProfileData = BI->getMetadata(LLVMContext::MD_prof);
@@ -1518,35 +1842,11 @@ static bool ExtractBranchMetadata(BranchInst *BI,
ConstantInt *CITrue = dyn_cast<ConstantInt>(ProfileData->getOperand(1));
ConstantInt *CIFalse = dyn_cast<ConstantInt>(ProfileData->getOperand(2));
if (!CITrue || !CIFalse) return false;
- ProbTrue = CITrue->getValue();
- ProbFalse = CIFalse->getValue();
- assert(ProbTrue.getBitWidth() == 32 && ProbFalse.getBitWidth() == 32 &&
- "Branch probability metadata must be 32-bit integers");
+ ProbTrue = CITrue->getValue().getZExtValue();
+ ProbFalse = CIFalse->getValue().getZExtValue();
return true;
}
-/// MultiplyAndLosePrecision - Multiplies A and B, then returns the result. In
-/// the event of overflow, logically-shifts all four inputs right until the
-/// multiply fits.
-static APInt MultiplyAndLosePrecision(APInt &A, APInt &B, APInt &C, APInt &D,
- unsigned &BitsLost) {
- BitsLost = 0;
- bool Overflow = false;
- APInt Result = A.umul_ov(B, Overflow);
- if (Overflow) {
- APInt MaxB = APInt::getMaxValue(A.getBitWidth()).udiv(A);
- do {
- B = B.lshr(1);
- ++BitsLost;
- } while (B.ugt(MaxB));
- A = A.lshr(BitsLost);
- C = C.lshr(BitsLost);
- D = D.lshr(BitsLost);
- Result = A * B;
- }
- return Result;
-}
-
/// checkCSEInPredecessor - Return true if the given instruction is available
/// in its predecessor block. If yes, the instruction will be removed.
///
@@ -1600,7 +1900,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
if (Cond == 0)
return false;
}
-
+
if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
Cond->getParent() != BB || !Cond->hasOneUse())
return false;
@@ -1623,7 +1923,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
isSafeToSpeculativelyExecute(FrontIt)) {
BonusInst = &*FrontIt;
++FrontIt;
-
+
// Ignore dbg intrinsics.
while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt;
}
@@ -1631,13 +1931,13 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// Only a single bonus inst is allowed.
if (&*FrontIt != Cond)
return false;
-
+
// Make sure the instruction after the condition is the cond branch.
BasicBlock::iterator CondIt = Cond; ++CondIt;
// Ingore dbg intrinsics.
while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt;
-
+
if (&*CondIt != BI)
return false;
@@ -1649,7 +1949,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
if (CE->canTrap())
return false;
-
+
// Finally, don't infinitely unroll conditional loops.
BasicBlock *TrueDest = BI->getSuccessor(0);
BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : 0;
@@ -1659,22 +1959,22 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
BasicBlock *PredBlock = *PI;
BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
-
+
// Check that we have two conditional branches. If there is a PHI node in
// the common successor, verify that the same value flows in from both
// blocks.
SmallVector<PHINode*, 4> PHIs;
if (PBI == 0 || PBI->isUnconditional() ||
- (BI->isConditional() &&
+ (BI->isConditional() &&
!SafeToMergeTerminators(BI, PBI)) ||
(!BI->isConditional() &&
!isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs)))
continue;
-
+
// Determine if the two branches share a common destination.
- Instruction::BinaryOps Opc;
+ Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd;
bool InvertPredCond = false;
-
+
if (BI->isConditional()) {
if (PBI->getSuccessor(0) == TrueDest)
Opc = Instruction::Or;
@@ -1693,7 +1993,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// Ensure that any values used in the bonus instruction are also used
// by the terminator of the predecessor. This means that those values
- // must already have been resolved, so we won't be inhibiting the
+ // must already have been resolved, so we won't be inhibiting the
// out-of-order core by speculating them earlier.
if (BonusInst) {
// Collect the values used by the bonus inst
@@ -1707,47 +2007,47 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
SmallVector<std::pair<Value*, unsigned>, 4> Worklist;
Worklist.push_back(std::make_pair(PBI->getOperand(0), 0));
-
+
// Walk up to four levels back up the use-def chain of the predecessor's
// terminator to see if all those values were used. The choice of four
// levels is arbitrary, to provide a compile-time-cost bound.
while (!Worklist.empty()) {
std::pair<Value*, unsigned> Pair = Worklist.back();
Worklist.pop_back();
-
+
if (Pair.second >= 4) continue;
UsedValues.erase(Pair.first);
if (UsedValues.empty()) break;
-
+
if (Instruction *I = dyn_cast<Instruction>(Pair.first)) {
for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
OI != OE; ++OI)
Worklist.push_back(std::make_pair(OI->get(), Pair.second+1));
- }
+ }
}
-
+
if (!UsedValues.empty()) return false;
}
DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
- IRBuilder<> Builder(PBI);
+ IRBuilder<> Builder(PBI);
// If we need to invert the condition in the pred block to match, do so now.
if (InvertPredCond) {
Value *NewCond = PBI->getCondition();
-
+
if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
CmpInst *CI = cast<CmpInst>(NewCond);
CI->setPredicate(CI->getInversePredicate());
} else {
- NewCond = Builder.CreateNot(NewCond,
+ NewCond = Builder.CreateNot(NewCond,
PBI->getCondition()->getName()+".not");
}
-
+
PBI->setCondition(NewCond);
PBI->swapSuccessors();
}
-
+
// If we have a bonus inst, clone it into the predecessor block.
Instruction *NewBonus = 0;
if (BonusInst) {
@@ -1756,7 +2056,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
NewBonus->takeName(BonusInst);
BonusInst->setName(BonusInst->getName()+".old");
}
-
+
// Clone Cond into the predecessor basic block, and or/and the
// two conditions together.
Instruction *New = Cond->clone();
@@ -1764,21 +2064,60 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
PredBlock->getInstList().insert(PBI, New);
New->takeName(Cond);
Cond->setName(New->getName()+".old");
-
+
if (BI->isConditional()) {
- Instruction *NewCond =
+ Instruction *NewCond =
cast<Instruction>(Builder.CreateBinOp(Opc, PBI->getCondition(),
New, "or.cond"));
PBI->setCondition(NewCond);
+ uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
+ bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight,
+ PredFalseWeight);
+ bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight,
+ SuccFalseWeight);
+ SmallVector<uint64_t, 8> NewWeights;
+
if (PBI->getSuccessor(0) == BB) {
+ if (PredHasWeights && SuccHasWeights) {
+ // PBI: br i1 %x, BB, FalseDest
+ // BI: br i1 %y, TrueDest, FalseDest
+ //TrueWeight is TrueWeight for PBI * TrueWeight for BI.
+ NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
+ //FalseWeight is FalseWeight for PBI * TotalWeight for BI +
+ // TrueWeight for PBI * FalseWeight for BI.
+ // We assume that total weights of a BranchInst can fit into 32 bits.
+ // Therefore, we will not have overflow using 64-bit arithmetic.
+ NewWeights.push_back(PredFalseWeight * (SuccFalseWeight +
+ SuccTrueWeight) + PredTrueWeight * SuccFalseWeight);
+ }
AddPredecessorToBlock(TrueDest, PredBlock, BB);
PBI->setSuccessor(0, TrueDest);
}
if (PBI->getSuccessor(1) == BB) {
+ if (PredHasWeights && SuccHasWeights) {
+ // PBI: br i1 %x, TrueDest, BB
+ // BI: br i1 %y, TrueDest, FalseDest
+ //TrueWeight is TrueWeight for PBI * TotalWeight for BI +
+ // FalseWeight for PBI * TrueWeight for BI.
+ NewWeights.push_back(PredTrueWeight * (SuccFalseWeight +
+ SuccTrueWeight) + PredFalseWeight * SuccTrueWeight);
+ //FalseWeight is FalseWeight for PBI * FalseWeight for BI.
+ NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
+ }
AddPredecessorToBlock(FalseDest, PredBlock, BB);
PBI->setSuccessor(1, FalseDest);
}
+ if (NewWeights.size() == 2) {
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(NewWeights);
+
+ SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(),NewWeights.end());
+ PBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BI->getContext()).
+ createBranchWeights(MDWeights));
+ } else
+ PBI->setMetadata(LLVMContext::MD_prof, NULL);
} else {
// Update PHI nodes in the common successors.
for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
@@ -1806,7 +2145,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C)
// PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond)
// is false: PBI_Cond and BI_Value
- MergedCond =
+ MergedCond =
cast<Instruction>(Builder.CreateBinOp(Instruction::And,
PBI->getCondition(), New,
"and.cond"));
@@ -1814,7 +2153,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
Instruction *NotCond =
cast<Instruction>(Builder.CreateNot(PBI->getCondition(),
"not.cond"));
- MergedCond =
+ MergedCond =
cast<Instruction>(Builder.CreateBinOp(Instruction::Or,
NotCond, MergedCond,
"or.cond"));
@@ -1833,95 +2172,11 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// TODO: If BB is reachable from all paths through PredBlock, then we
// could replace PBI's branch probabilities with BI's.
- // Merge probability data into PredBlock's branch.
- APInt A, B, C, D;
- if (PBI->isConditional() && BI->isConditional() &&
- ExtractBranchMetadata(PBI, C, D) && ExtractBranchMetadata(BI, A, B)) {
- // Given IR which does:
- // bbA:
- // br i1 %x, label %bbB, label %bbC
- // bbB:
- // br i1 %y, label %bbD, label %bbC
- // Let's call the probability that we take the edge from %bbA to %bbB
- // 'a', from %bbA to %bbC, 'b', from %bbB to %bbD 'c' and from %bbB to
- // %bbC probability 'd'.
- //
- // We transform the IR into:
- // bbA:
- // br i1 %z, label %bbD, label %bbC
- // where the probability of going to %bbD is (a*c) and going to bbC is
- // (b+a*d).
- //
- // Probabilities aren't stored as ratios directly. Using branch weights,
- // we get:
- // (a*c)% = A*C, (b+(a*d))% = A*D+B*C+B*D.
-
- // In the event of overflow, we want to drop the LSB of the input
- // probabilities.
- unsigned BitsLost;
-
- // Ignore overflow result on ProbTrue.
- APInt ProbTrue = MultiplyAndLosePrecision(A, C, B, D, BitsLost);
-
- APInt Tmp1 = MultiplyAndLosePrecision(B, D, A, C, BitsLost);
- if (BitsLost) {
- ProbTrue = ProbTrue.lshr(BitsLost*2);
- }
-
- APInt Tmp2 = MultiplyAndLosePrecision(A, D, C, B, BitsLost);
- if (BitsLost) {
- ProbTrue = ProbTrue.lshr(BitsLost*2);
- Tmp1 = Tmp1.lshr(BitsLost*2);
- }
-
- APInt Tmp3 = MultiplyAndLosePrecision(B, C, A, D, BitsLost);
- if (BitsLost) {
- ProbTrue = ProbTrue.lshr(BitsLost*2);
- Tmp1 = Tmp1.lshr(BitsLost*2);
- Tmp2 = Tmp2.lshr(BitsLost*2);
- }
-
- bool Overflow1 = false, Overflow2 = false;
- APInt Tmp4 = Tmp2.uadd_ov(Tmp3, Overflow1);
- APInt ProbFalse = Tmp4.uadd_ov(Tmp1, Overflow2);
-
- if (Overflow1 || Overflow2) {
- ProbTrue = ProbTrue.lshr(1);
- Tmp1 = Tmp1.lshr(1);
- Tmp2 = Tmp2.lshr(1);
- Tmp3 = Tmp3.lshr(1);
- Tmp4 = Tmp2 + Tmp3;
- ProbFalse = Tmp4 + Tmp1;
- }
-
- // The sum of branch weights must fit in 32-bits.
- if (ProbTrue.isNegative() && ProbFalse.isNegative()) {
- ProbTrue = ProbTrue.lshr(1);
- ProbFalse = ProbFalse.lshr(1);
- }
-
- if (ProbTrue != ProbFalse) {
- // Normalize the result.
- APInt GCD = APIntOps::GreatestCommonDivisor(ProbTrue, ProbFalse);
- ProbTrue = ProbTrue.udiv(GCD);
- ProbFalse = ProbFalse.udiv(GCD);
-
- MDBuilder MDB(BI->getContext());
- MDNode *N = MDB.createBranchWeights(ProbTrue.getZExtValue(),
- ProbFalse.getZExtValue());
- PBI->setMetadata(LLVMContext::MD_prof, N);
- } else {
- PBI->setMetadata(LLVMContext::MD_prof, NULL);
- }
- } else {
- PBI->setMetadata(LLVMContext::MD_prof, NULL);
- }
-
// Copy any debug value intrinsics into the end of PredBlock.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
if (isa<DbgInfoIntrinsic>(*I))
I->clone()->insertBefore(PBI);
-
+
return true;
}
return false;
@@ -1936,7 +2191,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
BasicBlock *BB = BI->getParent();
// If this block ends with a branch instruction, and if there is a
- // predecessor that ends on a branch of the same condition, make
+ // predecessor that ends on a branch of the same condition, make
// this conditional branch redundant.
if (PBI->getCondition() == BI->getCondition() &&
PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
@@ -1945,11 +2200,11 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
if (BB->getSinglePredecessor()) {
// Turn this into a branch on constant.
bool CondIsTrue = PBI->getSuccessor(0) == BB;
- BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
+ BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
CondIsTrue));
return true; // Nuke the branch on constant.
}
-
+
// Otherwise, if there are multiple predecessors, insert a PHI that merges
// in the constant and simplify the block result. Subsequent passes of
// simplifycfg will thread the block.
@@ -1969,18 +2224,18 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
PBI->getCondition() == BI->getCondition() &&
PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
bool CondIsTrue = PBI->getSuccessor(0) == BB;
- NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
+ NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
CondIsTrue), P);
} else {
NewPN->addIncoming(BI->getCondition(), P);
}
}
-
+
BI->setCondition(NewPN);
return true;
}
}
-
+
// If this is a conditional branch in an empty block, and if any
// predecessors is a conditional branch to one of our destinations,
// fold the conditions into logical ops and one cond br.
@@ -1991,11 +2246,11 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
if (&*BBI != BI)
return false;
-
+
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
if (CE->canTrap())
return false;
-
+
int PBIOp, BIOp;
if (PBI->getSuccessor(0) == BI->getSuccessor(0))
PBIOp = BIOp = 0;
@@ -2007,31 +2262,31 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
PBIOp = BIOp = 1;
else
return false;
-
+
// Check to make sure that the other destination of this branch
// isn't BB itself. If so, this is an infinite loop that will
// keep getting unwound.
if (PBI->getSuccessor(PBIOp) == BB)
return false;
-
- // Do not perform this transformation if it would require
+
+ // Do not perform this transformation if it would require
// insertion of a large number of select instructions. For targets
// without predication/cmovs, this is a big pessimization.
BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
-
+
unsigned NumPhis = 0;
for (BasicBlock::iterator II = CommonDest->begin();
isa<PHINode>(II); ++II, ++NumPhis)
if (NumPhis > 2) // Disable this xform.
return false;
-
+
// Finally, if everything is ok, fold the branches to logical ops.
BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
-
+
DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
<< "AND: " << *BI->getParent());
-
-
+
+
// If OtherDest *is* BB, then BB is a basic block with a single conditional
// branch in it, where one edge (OtherDest) goes back to itself but the other
// exits. We don't *know* that the program avoids the infinite loop
@@ -2046,13 +2301,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
"infloop", BB->getParent());
BranchInst::Create(InfLoopBlock, InfLoopBlock);
OtherDest = InfLoopBlock;
- }
-
+ }
+
DEBUG(dbgs() << *PBI->getParent()->getParent());
// BI may have other predecessors. Because of this, we leave
// it alone, but modify PBI.
-
+
// Make sure we get to CommonDest on True&True directions.
Value *PBICond = PBI->getCondition();
IRBuilder<true, NoFolder> Builder(PBI);
@@ -2065,16 +2320,43 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
// Merge the conditions.
Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge");
-
+
// Modify PBI to branch on the new condition to the new dests.
PBI->setCondition(Cond);
PBI->setSuccessor(0, CommonDest);
PBI->setSuccessor(1, OtherDest);
-
+
+ // Update branch weight for PBI.
+ uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
+ bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight,
+ PredFalseWeight);
+ bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight,
+ SuccFalseWeight);
+ if (PredHasWeights && SuccHasWeights) {
+ uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
+ uint64_t PredOther = PBIOp ?PredTrueWeight : PredFalseWeight;
+ uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
+ uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
+ // The weight to CommonDest should be PredCommon * SuccTotal +
+ // PredOther * SuccCommon.
+ // The weight to OtherDest should be PredOther * SuccOther.
+ SmallVector<uint64_t, 2> NewWeights;
+ NewWeights.push_back(PredCommon * (SuccCommon + SuccOther) +
+ PredOther * SuccCommon);
+ NewWeights.push_back(PredOther * SuccOther);
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(NewWeights);
+
+ SmallVector<uint32_t, 2> MDWeights(NewWeights.begin(),NewWeights.end());
+ PBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BI->getContext()).
+ createBranchWeights(MDWeights));
+ }
+
// OtherDest may have phi nodes. If so, add an entry from PBI's
// block that are identical to the entries for BI's block.
AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
-
+
// We know that the CommonDest already had an edge from PBI to
// it. If it has PHIs though, the PHIs may have different
// entries for BB and PBI's BB. If so, insert a select to make
@@ -2092,10 +2374,10 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
PN->setIncomingValue(PBBIdx, NV);
}
}
-
+
DEBUG(dbgs() << "INTO: " << *PBI->getParent());
DEBUG(dbgs() << *PBI->getParent()->getParent());
-
+
// This basic block is probably dead. We know it has at least
// one fewer predecessor.
return true;
@@ -2107,7 +2389,9 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
// Also makes sure not to introduce new successors by assuming that edges to
// non-successor TrueBBs and FalseBBs aren't reachable.
static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
- BasicBlock *TrueBB, BasicBlock *FalseBB){
+ BasicBlock *TrueBB, BasicBlock *FalseBB,
+ uint32_t TrueWeight,
+ uint32_t FalseWeight){
// Remove any superfluous successor edges from the CFG.
// First, figure out which successors to preserve.
// If TrueBB and FalseBB are equal, only try to preserve one copy of that
@@ -2136,10 +2420,15 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
// We were only looking for one successor, and it was present.
// Create an unconditional branch to it.
Builder.CreateBr(TrueBB);
- else
+ else {
// We found both of the successors we were looking for.
// Create a conditional branch sharing the condition of the select.
- Builder.CreateCondBr(Cond, TrueBB, FalseBB);
+ BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
+ if (TrueWeight != FalseWeight)
+ NewBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(OldTerm->getContext()).
+ createBranchWeights(TrueWeight, FalseWeight));
+ }
} else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
// Neither of the selected blocks were successors, so this
// terminator must be unreachable.
@@ -2176,8 +2465,23 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
BasicBlock *TrueBB = SI->findCaseValue(TrueVal).getCaseSuccessor();
BasicBlock *FalseBB = SI->findCaseValue(FalseVal).getCaseSuccessor();
+ // Get weight for TrueBB and FalseBB.
+ uint32_t TrueWeight = 0, FalseWeight = 0;
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeights = HasBranchWeights(SI);
+ if (HasWeights) {
+ GetBranchWeights(SI, Weights);
+ if (Weights.size() == 1 + SI->getNumCases()) {
+ TrueWeight = (uint32_t)Weights[SI->findCaseValue(TrueVal).
+ getSuccessorIndex()];
+ FalseWeight = (uint32_t)Weights[SI->findCaseValue(FalseVal).
+ getSuccessorIndex()];
+ }
+ }
+
// Perform the actual simplification.
- return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB);
+ return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB,
+ TrueWeight, FalseWeight);
}
// SimplifyIndirectBrOnSelect - Replaces
@@ -2197,7 +2501,8 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
BasicBlock *FalseBB = FBA->getBasicBlock();
// Perform the actual simplification.
- return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB);
+ return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB,
+ 0, 0);
}
/// TryToSimplifyUncondBranchWithICmpInIt - This is called when we find an icmp
@@ -2214,11 +2519,11 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
/// br label %end
/// end:
/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
-///
+///
/// We prefer to split the edge to 'end' so that there is a true/false entry to
/// the PHI, merging the third icmp into the switch.
static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
- const TargetData *TD,
+ const DataLayout *TD,
IRBuilder<> &Builder) {
BasicBlock *BB = ICI->getParent();
@@ -2228,17 +2533,17 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
Value *V = ICI->getOperand(0);
ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
-
+
// The pattern we're looking for is where our only predecessor is a switch on
// 'V' and this block is the default case for the switch. In this case we can
// fold the compared value into the switch to simplify things.
BasicBlock *Pred = BB->getSinglePredecessor();
if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false;
-
+
SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
if (SI->getCondition() != V)
return false;
-
+
// If BB is reachable on a non-default case, then we simply know the value of
// V in this block. Substitute it and constant fold the icmp instruction
// away.
@@ -2246,7 +2551,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
ConstantInt *VVal = SI->findCaseDest(BB);
assert(VVal && "Should have a unique destination value");
ICI->setOperand(0, VVal);
-
+
if (Value *V = SimplifyInstruction(ICI, TD)) {
ICI->replaceAllUsesWith(V);
ICI->eraseFromParent();
@@ -2254,7 +2559,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
// BB is now empty, so it is likely to simplify away.
return SimplifyCFG(BB) | true;
}
-
+
// Ok, the block is reachable from the default dest. If the constant we're
// comparing exists in one of the other edges, then we can constant fold ICI
// and zap it.
@@ -2264,13 +2569,13 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
V = ConstantInt::getFalse(BB->getContext());
else
V = ConstantInt::getTrue(BB->getContext());
-
+
ICI->replaceAllUsesWith(V);
ICI->eraseFromParent();
// BB is now empty, so it is likely to simplify away.
return SimplifyCFG(BB) | true;
}
-
+
// The use of the icmp has to be in the 'end' block, by the only PHI node in
// the block.
BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
@@ -2296,8 +2601,23 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
// the switch to the merge point on the compared value.
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge",
BB->getParent(), BB);
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeights = HasBranchWeights(SI);
+ if (HasWeights) {
+ GetBranchWeights(SI, Weights);
+ if (Weights.size() == 1 + SI->getNumCases()) {
+ // Split weight for default case to case for "Cst".
+ Weights[0] = (Weights[0]+1) >> 1;
+ Weights.push_back(Weights[0]);
+
+ SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getContext()).
+ createBranchWeights(MDWeights));
+ }
+ }
SI->addCase(Cst, NewBB);
-
+
// NewBB branches to the phi block, add the uncond branch and the phi entry.
Builder.SetInsertPoint(NewBB);
Builder.SetCurrentDebugLocation(SI->getDebugLoc());
@@ -2309,12 +2629,12 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
/// SimplifyBranchOnICmpChain - The specified branch is a conditional branch.
/// Check to see if it is branching on an or/and chain of icmp instructions, and
/// fold it into a switch instruction if so.
-static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
+static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD,
IRBuilder<> &Builder) {
Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
if (Cond == 0) return false;
-
-
+
+
// Change br (X == 0 | X == 1), T, F into a switch instruction.
// If this is a bunch of seteq's or'd together, or if it's a bunch of
// 'setne's and'ed together, collect them.
@@ -2323,7 +2643,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
bool TrueWhenEqual = true;
Value *ExtraCase = 0;
unsigned UsedICmps = 0;
-
+
if (Cond->getOpcode() == Instruction::Or) {
CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true,
UsedICmps);
@@ -2332,7 +2652,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
UsedICmps);
TrueWhenEqual = false;
}
-
+
// If we didn't have a multiply compared value, fail.
if (CompVal == 0) return false;
@@ -2344,21 +2664,24 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
// instruction can't handle, remove them now.
array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
-
+
// If Extra was used, we require at least two switch values to do the
// transformation. A switch with one value is just an cond branch.
if (ExtraCase && Values.size() < 2) return false;
-
+
+ // TODO: Preserve branch weight metadata, similarly to how
+ // FoldValueComparisonIntoPredecessors preserves it.
+
// Figure out which block is which destination.
BasicBlock *DefaultBB = BI->getSuccessor(1);
BasicBlock *EdgeBB = BI->getSuccessor(0);
if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
-
+
BasicBlock *BB = BI->getParent();
-
+
DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
<< " cases into SWITCH. BB is:\n" << *BB);
-
+
// If there are any extra values that couldn't be folded into the switch
// then we evaluate them with an explicit branch first. Split the block
// right before the condbr to handle it.
@@ -2372,13 +2695,13 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
else
Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
-
+
OldTI->eraseFromParent();
-
+
// If there are PHI nodes in EdgeBB, then we need to add a new entry to them
// for the edge we just added.
AddPredecessorToBlock(EdgeBB, BB, NewBB);
-
+
DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
<< "\nEXTRABB = " << *BB);
BB = NewBB;
@@ -2387,19 +2710,19 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
Builder.SetInsertPoint(BI);
// Convert pointer to int before we switch.
if (CompVal->getType()->isPointerTy()) {
- assert(TD && "Cannot switch on pointer without TargetData");
+ assert(TD && "Cannot switch on pointer without DataLayout");
CompVal = Builder.CreatePtrToInt(CompVal,
TD->getIntPtrType(CompVal->getContext()),
"magicptr");
}
-
+
// Create the new switch instruction now.
SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
// Add all of the 'cases' to the switch instruction.
for (unsigned i = 0, e = Values.size(); i != e; ++i)
New->addCase(Values[i], EdgeBB);
-
+
// We added edges from PI to the EdgeBB. As such, if there were any
// PHI nodes in EdgeBB, they need entries to be added corresponding to
// the number of edges added.
@@ -2410,10 +2733,10 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
for (unsigned i = 0, e = Values.size()-1; i != e; ++i)
PN->addIncoming(InVal, BB);
}
-
+
// Erase the old branch instruction.
EraseTerminatorInstAndDCECond(BI);
-
+
DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
return true;
}
@@ -2467,7 +2790,7 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
BasicBlock *BB = RI->getParent();
if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
-
+
// Find predecessors that end with branches.
SmallVector<BasicBlock*, 8> UncondBranchPreds;
SmallVector<BranchInst*, 8> CondBranchPreds;
@@ -2481,7 +2804,7 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
CondBranchPreds.push_back(BI);
}
}
-
+
// If we found some, do the transformation!
if (!UncondBranchPreds.empty() && DupRet) {
while (!UncondBranchPreds.empty()) {
@@ -2490,21 +2813,21 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
<< "INTO UNCOND BRANCH PRED: " << *Pred);
(void)FoldReturnIntoUncondBranch(RI, BB, Pred);
}
-
+
// If we eliminated all predecessors of the block, delete the block now.
if (pred_begin(BB) == pred_end(BB))
// We know there are no successors, so just nuke the block.
BB->eraseFromParent();
-
+
return true;
}
-
+
// Check out all of the conditional branches going to this return
// instruction. If any of them just select between returns, change the
// branch itself into a select/return pair.
while (!CondBranchPreds.empty()) {
BranchInst *BI = CondBranchPreds.pop_back_val();
-
+
// Check to see if the non-BB successor is also a return block.
if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
@@ -2516,9 +2839,9 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
BasicBlock *BB = UI->getParent();
-
+
bool Changed = false;
-
+
// If there are any instructions immediately before the unreachable that can
// be removed, do so.
while (UI != BB->begin()) {
@@ -2558,11 +2881,11 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
BBI->eraseFromParent();
Changed = true;
}
-
+
// If the unreachable instruction is the first in the block, take a gander
// at all of the predecessors of this instruction, and simplify them.
if (&BB->front() != UI) return Changed;
-
+
SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
TerminatorInst *TI = Preds[i]->getTerminator();
@@ -2615,7 +2938,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
BasicBlock *MaxBlock = 0;
for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator
I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
- if (I->second.first > MaxPop ||
+ if (I->second.first > MaxPop ||
(I->second.first == MaxPop && MaxIndex > I->second.second)) {
MaxPop = I->second.first;
MaxIndex = I->second.second;
@@ -2627,13 +2950,13 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
// edges to it.
SI->setDefaultDest(MaxBlock);
Changed = true;
-
+
// If MaxBlock has phinodes in it, remove MaxPop-1 entries from
// it.
if (isa<PHINode>(MaxBlock->begin()))
for (unsigned i = 0; i != MaxPop-1; ++i)
MaxBlock->removePredecessor(SI->getParent());
-
+
for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
i != e; ++i)
if (i.getCaseSuccessor() == MaxBlock) {
@@ -2648,7 +2971,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
// place to note that the call does not throw though.
BranchInst *BI = Builder.CreateBr(II->getNormalDest());
II->removeFromParent(); // Take out of symbol table
-
+
// Insert the call now...
SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
Builder.SetInsertPoint(BI);
@@ -2663,7 +2986,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
}
}
}
-
+
// If this block is now dead, remove it.
if (pred_begin(BB) == pred_end(BB) &&
BB != &BB->getParent()->getEntryBlock()) {
@@ -2706,9 +3029,28 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
if (!Offset->isNullValue())
Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
- Builder.CreateCondBr(
+ BranchInst *NewBI = Builder.CreateCondBr(
Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest());
+ // Update weight for the newly-created conditional branch.
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeights = HasBranchWeights(SI);
+ if (HasWeights) {
+ GetBranchWeights(SI, Weights);
+ if (Weights.size() == 1 + SI->getNumCases()) {
+ // Combine all weights for the cases to be the true weight of NewBI.
+ // We assume that the sum of all weights for a Terminator can fit into 32
+ // bits.
+ uint32_t NewTrueWeight = 0;
+ for (unsigned I = 1, E = Weights.size(); I != E; ++I)
+ NewTrueWeight += (uint32_t)Weights[I];
+ NewBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getContext()).
+ createBranchWeights(NewTrueWeight,
+ (uint32_t)Weights[0]));
+ }
+ }
+
// Prune obsolete incoming values off the successor's PHI nodes.
for (BasicBlock::iterator BBI = SI->case_begin().getCaseSuccessor()->begin();
isa<PHINode>(BBI); ++BBI) {
@@ -2739,15 +3081,33 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) {
}
}
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeight = HasBranchWeights(SI);
+ if (HasWeight) {
+ GetBranchWeights(SI, Weights);
+ HasWeight = (Weights.size() == 1 + SI->getNumCases());
+ }
+
// Remove dead cases from the switch.
for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) {
SwitchInst::CaseIt Case = SI->findCaseValue(DeadCases[I]);
assert(Case != SI->case_default() &&
"Case was not found. Probably mistake in DeadCases forming.");
+ if (HasWeight) {
+ std::swap(Weights[Case.getCaseIndex()+1], Weights.back());
+ Weights.pop_back();
+ }
+
// Prune unused values from PHI nodes.
Case.getCaseSuccessor()->removePredecessor(SI->getParent());
SI->removeCase(Case);
}
+ if (HasWeight) {
+ SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getParent()->getContext()).
+ createBranchWeights(MDWeights));
+ }
return !DeadCases.empty();
}
@@ -2823,33 +3183,512 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
return Changed;
}
-bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
- // If this switch is too complex to want to look at, ignore it.
- if (!isValueEqualityComparison(SI))
+/// ValidLookupTableConstant - Return true if the backend will be able to handle
+/// initializing an array of constants like C.
+static bool ValidLookupTableConstant(Constant *C) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ return CE->isGEPWithNoNotionalOverIndexing();
+
+ return isa<ConstantFP>(C) ||
+ isa<ConstantInt>(C) ||
+ isa<ConstantPointerNull>(C) ||
+ isa<GlobalValue>(C) ||
+ isa<UndefValue>(C);
+}
+
+/// LookupConstant - If V is a Constant, return it. Otherwise, try to look up
+/// its constant value in ConstantPool, returning 0 if it's not there.
+static Constant *LookupConstant(Value *V,
+ const SmallDenseMap<Value*, Constant*>& ConstantPool) {
+ if (Constant *C = dyn_cast<Constant>(V))
+ return C;
+ return ConstantPool.lookup(V);
+}
+
+/// ConstantFold - Try to fold instruction I into a constant. This works for
+/// simple instructions such as binary operations where both operands are
+/// constant or can be replaced by constants from the ConstantPool. Returns the
+/// resulting constant on success, 0 otherwise.
+static Constant *ConstantFold(Instruction *I,
+ const SmallDenseMap<Value*, Constant*>& ConstantPool) {
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ Constant *A = LookupConstant(BO->getOperand(0), ConstantPool);
+ if (!A)
+ return 0;
+ Constant *B = LookupConstant(BO->getOperand(1), ConstantPool);
+ if (!B)
+ return 0;
+ return ConstantExpr::get(BO->getOpcode(), A, B);
+ }
+
+ if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
+ Constant *A = LookupConstant(I->getOperand(0), ConstantPool);
+ if (!A)
+ return 0;
+ Constant *B = LookupConstant(I->getOperand(1), ConstantPool);
+ if (!B)
+ return 0;
+ return ConstantExpr::getCompare(Cmp->getPredicate(), A, B);
+ }
+
+ if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
+ Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
+ if (!A)
+ return 0;
+ if (A->isAllOnesValue())
+ return LookupConstant(Select->getTrueValue(), ConstantPool);
+ if (A->isNullValue())
+ return LookupConstant(Select->getFalseValue(), ConstantPool);
+ return 0;
+ }
+
+ if (CastInst *Cast = dyn_cast<CastInst>(I)) {
+ Constant *A = LookupConstant(I->getOperand(0), ConstantPool);
+ if (!A)
+ return 0;
+ return ConstantExpr::getCast(Cast->getOpcode(), A, Cast->getDestTy());
+ }
+
+ return 0;
+}
+
+/// GetCaseResults - Try to determine the resulting constant values in phi nodes
+/// at the common destination basic block, *CommonDest, for one of the case
+/// destionations CaseDest corresponding to value CaseVal (0 for the default
+/// case), of a switch instruction SI.
+static bool GetCaseResults(SwitchInst *SI,
+ ConstantInt *CaseVal,
+ BasicBlock *CaseDest,
+ BasicBlock **CommonDest,
+ SmallVector<std::pair<PHINode*,Constant*>, 4> &Res) {
+ // The block from which we enter the common destination.
+ BasicBlock *Pred = SI->getParent();
+
+ // If CaseDest is empty except for some side-effect free instructions through
+ // which we can constant-propagate the CaseVal, continue to its successor.
+ SmallDenseMap<Value*, Constant*> ConstantPool;
+ ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
+ for (BasicBlock::iterator I = CaseDest->begin(), E = CaseDest->end(); I != E;
+ ++I) {
+ if (TerminatorInst *T = dyn_cast<TerminatorInst>(I)) {
+ // If the terminator is a simple branch, continue to the next block.
+ if (T->getNumSuccessors() != 1)
+ return false;
+ Pred = CaseDest;
+ CaseDest = T->getSuccessor(0);
+ } else if (isa<DbgInfoIntrinsic>(I)) {
+ // Skip debug intrinsic.
+ continue;
+ } else if (Constant *C = ConstantFold(I, ConstantPool)) {
+ // Instruction is side-effect free and constant.
+ ConstantPool.insert(std::make_pair(I, C));
+ } else {
+ break;
+ }
+ }
+
+ // If we did not have a CommonDest before, use the current one.
+ if (!*CommonDest)
+ *CommonDest = CaseDest;
+ // If the destination isn't the common one, abort.
+ if (CaseDest != *CommonDest)
+ return false;
+
+ // Get the values for this case from phi nodes in the destination block.
+ BasicBlock::iterator I = (*CommonDest)->begin();
+ while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
+ int Idx = PHI->getBasicBlockIndex(Pred);
+ if (Idx == -1)
+ continue;
+
+ Constant *ConstVal = LookupConstant(PHI->getIncomingValue(Idx),
+ ConstantPool);
+ if (!ConstVal)
+ return false;
+
+ // Note: If the constant comes from constant-propagating the case value
+ // through the CaseDest basic block, it will be safe to remove the
+ // instructions in that block. They cannot be used (except in the phi nodes
+ // we visit) outside CaseDest, because that block does not dominate its
+ // successor. If it did, we would not be in this phi node.
+
+ // Be conservative about which kinds of constants we support.
+ if (!ValidLookupTableConstant(ConstVal))
+ return false;
+
+ Res.push_back(std::make_pair(PHI, ConstVal));
+ }
+
+ return true;
+}
+
+namespace {
+ /// SwitchLookupTable - This class represents a lookup table that can be used
+ /// to replace a switch.
+ class SwitchLookupTable {
+ public:
+ /// SwitchLookupTable - Create a lookup table to use as a switch replacement
+ /// with the contents of Values, using DefaultValue to fill any holes in the
+ /// table.
+ SwitchLookupTable(Module &M,
+ uint64_t TableSize,
+ ConstantInt *Offset,
+ const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
+ Constant *DefaultValue,
+ const DataLayout *TD);
+
+ /// BuildLookup - Build instructions with Builder to retrieve the value at
+ /// the position given by Index in the lookup table.
+ Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
+
+ /// WouldFitInRegister - Return true if a table with TableSize elements of
+ /// type ElementType would fit in a target-legal register.
+ static bool WouldFitInRegister(const DataLayout *TD,
+ uint64_t TableSize,
+ const Type *ElementType);
+
+ private:
+ // Depending on the contents of the table, it can be represented in
+ // different ways.
+ enum {
+ // For tables where each element contains the same value, we just have to
+ // store that single value and return it for each lookup.
+ SingleValueKind,
+
+ // For small tables with integer elements, we can pack them into a bitmap
+ // that fits into a target-legal register. Values are retrieved by
+ // shift and mask operations.
+ BitMapKind,
+
+ // The table is stored as an array of values. Values are retrieved by load
+ // instructions from the table.
+ ArrayKind
+ } Kind;
+
+ // For SingleValueKind, this is the single value.
+ Constant *SingleValue;
+
+ // For BitMapKind, this is the bitmap.
+ ConstantInt *BitMap;
+ IntegerType *BitMapElementTy;
+
+ // For ArrayKind, this is the array.
+ GlobalVariable *Array;
+ };
+}
+
+SwitchLookupTable::SwitchLookupTable(Module &M,
+ uint64_t TableSize,
+ ConstantInt *Offset,
+ const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
+ Constant *DefaultValue,
+ const DataLayout *TD) {
+ assert(Values.size() && "Can't build lookup table without values!");
+ assert(TableSize >= Values.size() && "Can't fit values in table!");
+
+ // If all values in the table are equal, this is that value.
+ SingleValue = Values.begin()->second;
+
+ // Build up the table contents.
+ SmallVector<Constant*, 64> TableContents(TableSize);
+ for (size_t I = 0, E = Values.size(); I != E; ++I) {
+ ConstantInt *CaseVal = Values[I].first;
+ Constant *CaseRes = Values[I].second;
+ assert(CaseRes->getType() == DefaultValue->getType());
+
+ uint64_t Idx = (CaseVal->getValue() - Offset->getValue())
+ .getLimitedValue();
+ TableContents[Idx] = CaseRes;
+
+ if (CaseRes != SingleValue)
+ SingleValue = 0;
+ }
+
+ // Fill in any holes in the table with the default result.
+ if (Values.size() < TableSize) {
+ for (uint64_t I = 0; I < TableSize; ++I) {
+ if (!TableContents[I])
+ TableContents[I] = DefaultValue;
+ }
+
+ if (DefaultValue != SingleValue)
+ SingleValue = 0;
+ }
+
+ // If each element in the table contains the same value, we only need to store
+ // that single value.
+ if (SingleValue) {
+ Kind = SingleValueKind;
+ return;
+ }
+
+ // If the type is integer and the table fits in a register, build a bitmap.
+ if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) {
+ IntegerType *IT = cast<IntegerType>(DefaultValue->getType());
+ APInt TableInt(TableSize * IT->getBitWidth(), 0);
+ for (uint64_t I = TableSize; I > 0; --I) {
+ TableInt <<= IT->getBitWidth();
+ // Insert values into the bitmap. Undef values are set to zero.
+ if (!isa<UndefValue>(TableContents[I - 1])) {
+ ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
+ TableInt |= Val->getValue().zext(TableInt.getBitWidth());
+ }
+ }
+ BitMap = ConstantInt::get(M.getContext(), TableInt);
+ BitMapElementTy = IT;
+ Kind = BitMapKind;
+ ++NumBitMaps;
+ return;
+ }
+
+ // Store the table in an array.
+ ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize);
+ Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
+
+ Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true,
+ GlobalVariable::PrivateLinkage,
+ Initializer,
+ "switch.table");
+ Array->setUnnamedAddr(true);
+ Kind = ArrayKind;
+}
+
+Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
+ switch (Kind) {
+ case SingleValueKind:
+ return SingleValue;
+ case BitMapKind: {
+ // Type of the bitmap (e.g. i59).
+ IntegerType *MapTy = BitMap->getType();
+
+ // Cast Index to the same type as the bitmap.
+ // Note: The Index is <= the number of elements in the table, so
+ // truncating it to the width of the bitmask is safe.
+ Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
+
+ // Multiply the shift amount by the element width.
+ ShiftAmt = Builder.CreateMul(ShiftAmt,
+ ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
+ "switch.shiftamt");
+
+ // Shift down.
+ Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt,
+ "switch.downshift");
+ // Mask off.
+ return Builder.CreateTrunc(DownShifted, BitMapElementTy,
+ "switch.masked");
+ }
+ case ArrayKind: {
+ Value *GEPIndices[] = { Builder.getInt32(0), Index };
+ Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices,
+ "switch.gep");
+ return Builder.CreateLoad(GEP, "switch.load");
+ }
+ }
+ llvm_unreachable("Unknown lookup table kind!");
+}
+
+bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD,
+ uint64_t TableSize,
+ const Type *ElementType) {
+ if (!TD)
+ return false;
+ const IntegerType *IT = dyn_cast<IntegerType>(ElementType);
+ if (!IT)
+ return false;
+ // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
+ // are <= 15, we could try to narrow the type.
+
+ // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
+ if (TableSize >= UINT_MAX/IT->getBitWidth())
+ return false;
+ return TD->fitsInLegalInteger(TableSize * IT->getBitWidth());
+}
+
+/// ShouldBuildLookupTable - Determine whether a lookup table should be built
+/// for this switch, based on the number of caes, size of the table and the
+/// types of the results.
+static bool ShouldBuildLookupTable(SwitchInst *SI,
+ uint64_t TableSize,
+ const DataLayout *TD,
+ const SmallDenseMap<PHINode*, Type*>& ResultTypes) {
+ // The table density should be at least 40%. This is the same criterion as for
+ // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
+ // FIXME: Find the best cut-off.
+ if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
+ return false; // TableSize overflowed, or mul below might overflow.
+ if (SI->getNumCases() * 10 >= TableSize * 4)
+ return true;
+
+ // If each table would fit in a register, we should build it anyway.
+ for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(),
+ E = ResultTypes.end(); I != E; ++I) {
+ if (!SwitchLookupTable::WouldFitInRegister(TD, TableSize, I->second))
+ return false;
+ }
+ return true;
+}
+
+/// SwitchToLookupTable - If the switch is only used to initialize one or more
+/// phi nodes in a common successor block with different constant values,
+/// replace the switch with lookup tables.
+static bool SwitchToLookupTable(SwitchInst *SI,
+ IRBuilder<> &Builder,
+ const DataLayout* TD,
+ const TargetTransformInfo *TTI) {
+ assert(SI->getNumCases() > 1 && "Degenerate switch?");
+
+ // Only build lookup table when we have a target that supports it.
+ if (!TTI || !TTI->getScalarTargetTransformInfo() ||
+ !TTI->getScalarTargetTransformInfo()->shouldBuildLookupTables())
return false;
+ // FIXME: If the switch is too sparse for a lookup table, perhaps we could
+ // split off a dense part and build a lookup table for that.
+
+ // FIXME: This creates arrays of GEPs to constant strings, which means each
+ // GEP needs a runtime relocation in PIC code. We should just build one big
+ // string and lookup indices into that.
+
+ // Ignore the switch if the number of cases is too small.
+ // This is similar to the check when building jump tables in
+ // SelectionDAGBuilder::handleJTSwitchCase.
+ // FIXME: Determine the best cut-off.
+ if (SI->getNumCases() < 4)
+ return false;
+
+ // Figure out the corresponding result for each case value and phi node in the
+ // common destination, as well as the the min and max case values.
+ assert(SI->case_begin() != SI->case_end());
+ SwitchInst::CaseIt CI = SI->case_begin();
+ ConstantInt *MinCaseVal = CI.getCaseValue();
+ ConstantInt *MaxCaseVal = CI.getCaseValue();
+
+ BasicBlock *CommonDest = 0;
+ typedef SmallVector<std::pair<ConstantInt*, Constant*>, 4> ResultListTy;
+ SmallDenseMap<PHINode*, ResultListTy> ResultLists;
+ SmallDenseMap<PHINode*, Constant*> DefaultResults;
+ SmallDenseMap<PHINode*, Type*> ResultTypes;
+ SmallVector<PHINode*, 4> PHIs;
+
+ for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
+ ConstantInt *CaseVal = CI.getCaseValue();
+ if (CaseVal->getValue().slt(MinCaseVal->getValue()))
+ MinCaseVal = CaseVal;
+ if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
+ MaxCaseVal = CaseVal;
+
+ // Resulting value at phi nodes for this case value.
+ typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy;
+ ResultsTy Results;
+ if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest,
+ Results))
+ return false;
+
+ // Append the result from this case to the list for each phi.
+ for (ResultsTy::iterator I = Results.begin(), E = Results.end(); I!=E; ++I) {
+ if (!ResultLists.count(I->first))
+ PHIs.push_back(I->first);
+ ResultLists[I->first].push_back(std::make_pair(CaseVal, I->second));
+ }
+ }
+
+ // Get the resulting values for the default case.
+ SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
+ if (!GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest,
+ DefaultResultsList))
+ return false;
+ for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) {
+ PHINode *PHI = DefaultResultsList[I].first;
+ Constant *Result = DefaultResultsList[I].second;
+ DefaultResults[PHI] = Result;
+ ResultTypes[PHI] = Result->getType();
+ }
+
+ APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
+ uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
+ if (!ShouldBuildLookupTable(SI, TableSize, TD, ResultTypes))
+ return false;
+
+ // Create the BB that does the lookups.
+ Module &Mod = *CommonDest->getParent()->getParent();
+ BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(),
+ "switch.lookup",
+ CommonDest->getParent(),
+ CommonDest);
+
+ // Check whether the condition value is within the case range, and branch to
+ // the new BB.
+ Builder.SetInsertPoint(SI);
+ Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
+ "switch.tableidx");
+ Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
+ MinCaseVal->getType(), TableSize));
+ Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+
+ // Populate the BB that does the lookups.
+ Builder.SetInsertPoint(LookupBB);
+ bool ReturnedEarly = false;
+ for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
+ PHINode *PHI = PHIs[I];
+
+ SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI],
+ DefaultResults[PHI], TD);
+
+ Value *Result = Table.BuildLookup(TableIndex, Builder);
+
+ // If the result is used to return immediately from the function, we want to
+ // do that right here.
+ if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin()) &&
+ *PHI->use_begin() == CommonDest->getFirstNonPHIOrDbg()) {
+ Builder.CreateRet(Result);
+ ReturnedEarly = true;
+ break;
+ }
+
+ PHI->addIncoming(Result, LookupBB);
+ }
+
+ if (!ReturnedEarly)
+ Builder.CreateBr(CommonDest);
+
+ // Remove the switch.
+ for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) {
+ BasicBlock *Succ = SI->getSuccessor(i);
+ if (Succ == SI->getDefaultDest()) continue;
+ Succ->removePredecessor(SI->getParent());
+ }
+ SI->eraseFromParent();
+
+ ++NumLookupTables;
+ return true;
+}
+
+bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
BasicBlock *BB = SI->getParent();
- // If we only have one predecessor, and if it is a branch on this value,
- // see if that predecessor totally determines the outcome of this switch.
- if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
- if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
- return SimplifyCFG(BB) | true;
+ if (isValueEqualityComparison(SI)) {
+ // If we only have one predecessor, and if it is a branch on this value,
+ // see if that predecessor totally determines the outcome of this switch.
+ if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+ if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
+ return SimplifyCFG(BB) | true;
- Value *Cond = SI->getCondition();
- if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
- if (SimplifySwitchOnSelect(SI, Select))
- return SimplifyCFG(BB) | true;
+ Value *Cond = SI->getCondition();
+ if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
+ if (SimplifySwitchOnSelect(SI, Select))
+ return SimplifyCFG(BB) | true;
- // If the block only contains the switch, see if we can fold the block
- // away into any preds.
- BasicBlock::iterator BBI = BB->begin();
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(BBI))
- ++BBI;
- if (SI == &*BBI)
- if (FoldValueComparisonIntoPredecessors(SI, Builder))
- return SimplifyCFG(BB) | true;
+ // If the block only contains the switch, see if we can fold the block
+ // away into any preds.
+ BasicBlock::iterator BBI = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ if (SI == &*BBI)
+ if (FoldValueComparisonIntoPredecessors(SI, Builder))
+ return SimplifyCFG(BB) | true;
+ }
// Try to transform the switch into an icmp and a branch.
if (TurnSwitchRangeIntoICmp(SI, Builder))
@@ -2862,13 +3701,16 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
if (ForwardSwitchConditionToPHI(SI))
return SimplifyCFG(BB) | true;
+ if (SwitchToLookupTable(SI, Builder, TD, TTI))
+ return SimplifyCFG(BB) | true;
+
return false;
}
bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
BasicBlock *BB = IBI->getParent();
bool Changed = false;
-
+
// Eliminate redundant destinations.
SmallPtrSet<Value *, 8> Succs;
for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
@@ -2879,7 +3721,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
--i; --e;
Changed = true;
}
- }
+ }
if (IBI->getNumDestinations() == 0) {
// If the indirectbr has no successors, change it to unreachable.
@@ -2887,14 +3729,14 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
EraseTerminatorInstAndDCECond(IBI);
return true;
}
-
+
if (IBI->getNumDestinations() == 1) {
// If the indirectbr has one successor, change it to a direct branch.
BranchInst::Create(IBI->getDestination(0), IBI);
EraseTerminatorInstAndDCECond(IBI);
return true;
}
-
+
if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
if (SimplifyIndirectBrOnSelect(IBI, SI))
return SimplifyCFG(BB) | true;
@@ -2904,13 +3746,16 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
BasicBlock *BB = BI->getParent();
-
+
+ if (SinkCommon && SinkThenElseCodeToEnd(BI))
+ return true;
+
// If the Terminator is the only non-phi instruction, simplify the block.
BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime();
if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
TryToSimplifyUncondBranchFromEmptyBlock(BB))
return true;
-
+
// If the only instruction in the block is a seteq/setne comparison
// against a constant, try to simplify the block.
if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
@@ -2921,7 +3766,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder))
return true;
}
-
+
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and our successor, fold the comparison into the
// predecessor and use logical operations to update the incoming value
@@ -2934,7 +3779,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
BasicBlock *BB = BI->getParent();
-
+
// Conditional branch
if (isValueEqualityComparison(BI)) {
// If we only have one predecessor, and if it is a branch on this value,
@@ -2943,7 +3788,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
return SimplifyCFG(BB) | true;
-
+
// This block must be empty, except for the setcond inst, if it exists.
// Ignore dbg intrinsics.
BasicBlock::iterator I = BB->begin();
@@ -2962,17 +3807,17 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
return SimplifyCFG(BB) | true;
}
}
-
+
// Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
if (SimplifyBranchOnICmpChain(BI, TD, Builder))
return true;
-
+
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
if (FoldBranchToCommonDest(BI))
return SimplifyCFG(BB) | true;
-
+
// We have a conditional branch to two blocks that are only reachable
// from BI. We know that the condbr dominates the two blocks, so see if
// there is any identical code in the "then" and "else" blocks. If so, we
@@ -2999,14 +3844,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
return SimplifyCFG(BB) | true;
}
-
+
// If this is a branch on a phi node in the current block, thread control
// through this block if any PHI node entries are constants.
if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
if (PN->getParent() == BI->getParent())
if (FoldCondBranchOnPHI(BI, TD))
return SimplifyCFG(BB) | true;
-
+
// Scan predecessor blocks for conditional branches.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
@@ -3023,11 +3868,12 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
if (!C)
return false;
- if (!I->hasOneUse()) // Only look at single-use instructions, for compile time
+ if (I->use_empty())
return false;
if (C->isNullValue()) {
- Instruction *Use = I->use_back();
+ // Only look at the first use, avoid hurting compile time with long uselists
+ User *Use = *I->use_begin();
// Now make sure that there are no instructions in between that can alter
// control flow (eg. calls)
@@ -3114,7 +3960,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
//
if (MergeBlockIntoPredecessor(BB))
return true;
-
+
IRBuilder<> Builder(BB);
// If there is a trivial two-entry PHI node in this basic block, and we can
@@ -3152,6 +3998,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
/// eliminates unreachable basic blocks, and does other "peephole" optimization
/// of the CFG. It returns true if a modification was made.
///
-bool llvm::SimplifyCFG(BasicBlock *BB, const TargetData *TD) {
- return SimplifyCFGOpt(TD).run(BB);
+bool llvm::SimplifyCFG(BasicBlock *BB, const DataLayout *TD,
+ const TargetTransformInfo *TTI) {
+ return SimplifyCFGOpt(TD, TTI).run(BB);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index 5d673f1..110f3808 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -24,7 +24,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -44,7 +44,7 @@ namespace {
Loop *L;
LoopInfo *LI;
ScalarEvolution *SE;
- const TargetData *TD; // May be NULL
+ const DataLayout *TD; // May be NULL
SmallVectorImpl<WeakVH> &DeadInsts;
@@ -56,7 +56,7 @@ namespace {
L(Loop),
LI(LPM->getAnalysisIfAvailable<LoopInfo>()),
SE(SE),
- TD(LPM->getAnalysisIfAvailable<TargetData>()),
+ TD(LPM->getAnalysisIfAvailable<DataLayout>()),
DeadInsts(Dead),
Changed(false) {
assert(LI && "IV simplification requires LoopInfo");
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
index 81eb9e0..65353dc 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -23,7 +23,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -46,7 +46,7 @@ namespace {
/// runOnFunction - Remove instructions that simplify.
bool runOnFunction(Function &F) {
const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
- const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
bool Changed = false;
@@ -72,7 +72,7 @@ namespace {
++NumSimplified;
Changed = true;
}
- Changed |= RecursivelyDeleteTriviallyDeadInstructions(I);
+ Changed |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
}
// Place the list of instructions to simplify on the next loop iteration
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
new file mode 100644
index 0000000..c3ea638
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -0,0 +1,1149 @@
+//===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility pass used for testing the InstructionSimplify analysis.
+// The analysis is applied to every instruction, and if it simplifies then the
+// instruction is replaced by the simplification. If you are looking for a pass
+// that performs serious instruction folding, use the instcombine pass instead.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/DataLayout.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+
+using namespace llvm;
+
+/// This class is the abstract base class for the set of optimizations that
+/// corresponds to one library call.
+namespace {
+class LibCallOptimization {
+protected:
+ Function *Caller;
+ const DataLayout *TD;
+ const TargetLibraryInfo *TLI;
+ const LibCallSimplifier *LCS;
+ LLVMContext* Context;
+public:
+ LibCallOptimization() { }
+ virtual ~LibCallOptimization() {}
+
+ /// callOptimizer - This pure virtual method is implemented by base classes to
+ /// do various optimizations. If this returns null then no transformation was
+ /// performed. If it returns CI, then it transformed the call and CI is to be
+ /// deleted. If it returns something else, replace CI with the new value and
+ /// delete CI.
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
+ =0;
+
+ Value *optimizeCall(CallInst *CI, const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ const LibCallSimplifier *LCS, IRBuilder<> &B) {
+ Caller = CI->getParent()->getParent();
+ this->TD = TD;
+ this->TLI = TLI;
+ this->LCS = LCS;
+ if (CI->getCalledFunction())
+ Context = &CI->getCalledFunction()->getContext();
+
+ // We never change the calling convention.
+ if (CI->getCallingConv() != llvm::CallingConv::C)
+ return NULL;
+
+ return callOptimizer(CI->getCalledFunction(), CI, B);
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// isOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
+/// value is equal or not-equal to zero.
+static bool isOnlyUsedInZeroEqualityComparison(Value *V) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ if (IC->isEquality())
+ if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+ if (C->isNullValue())
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
+/// isOnlyUsedInEqualityComparison - Return true if it is only used in equality
+/// comparisons with With.
+static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ if (IC->isEquality() && IC->getOperand(1) == With)
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Fortified Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+struct FortifiedLibCallOptimization : public LibCallOptimization {
+protected:
+ virtual bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp,
+ bool isString) const = 0;
+};
+
+struct InstFortifiedLibCallOptimization : public FortifiedLibCallOptimization {
+ CallInst *CI;
+
+ bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
+ if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
+ return true;
+ if (ConstantInt *SizeCI =
+ dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
+ if (SizeCI->isAllOnesValue())
+ return true;
+ if (isString) {
+ uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp));
+ // If the length is 0 we don't know how long it is and so we can't
+ // remove the check.
+ if (Len == 0) return false;
+ return SizeCI->getZExtValue() >= Len;
+ }
+ if (ConstantInt *Arg = dyn_cast<ConstantInt>(
+ CI->getArgOperand(SizeArgOp)))
+ return SizeCI->getZExtValue() >= Arg->getZExtValue();
+ }
+ return false;
+ }
+};
+
+struct MemCpyChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return 0;
+
+ if (isFoldable(3, 2, false)) {
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+ return 0;
+ }
+};
+
+struct MemMoveChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return 0;
+
+ if (isFoldable(3, 2, false)) {
+ B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+ return 0;
+ }
+};
+
+struct MemSetChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return 0;
+
+ if (isFoldable(3, 2, false)) {
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
+ false);
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+ return 0;
+ }
+};
+
+struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ StringRef Name = Callee->getName();
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 3 ||
+ FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+ FT->getParamType(2) != TD->getIntPtrType(Context))
+ return 0;
+
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) // __strcpy_chk(x,x) -> x
+ return Src;
+
+ // If a) we don't have any length information, or b) we know this will
+ // fit then just lower to a plain strcpy. Otherwise we'll keep our
+ // strcpy_chk call which may fail at runtime if the size is too long.
+ // TODO: It might be nice to get a maximum length out of the possible
+ // string lengths for varying.
+ if (isFoldable(2, 1, true)) {
+ Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6));
+ return Ret;
+ } else {
+ // Maybe we can stil fold __strcpy_chk to __memcpy_chk.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+
+ // This optimization require DataLayout.
+ if (!TD) return 0;
+
+ Value *Ret =
+ EmitMemCpyChk(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(Context), Len),
+ CI->getArgOperand(2), B, TD, TLI);
+ return Ret;
+ }
+ return 0;
+ }
+};
+
+struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ StringRef Name = Callee->getName();
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 3 ||
+ FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+ FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)))
+ return 0;
+
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
+ Value *StrLen = EmitStrLen(Src, B, TD, TLI);
+ return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
+ }
+
+ // If a) we don't have any length information, or b) we know this will
+ // fit then just lower to a plain stpcpy. Otherwise we'll keep our
+ // stpcpy_chk call which may fail at runtime if the size is too long.
+ // TODO: It might be nice to get a maximum length out of the possible
+ // string lengths for varying.
+ if (isFoldable(2, 1, true)) {
+ Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6));
+ return Ret;
+ } else {
+ // Maybe we can stil fold __stpcpy_chk to __memcpy_chk.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+
+ // This optimization require DataLayout.
+ if (!TD) return 0;
+
+ Type *PT = FT->getParamType(0);
+ Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len);
+ Value *DstEnd = B.CreateGEP(Dst,
+ ConstantInt::get(TD->getIntPtrType(PT),
+ Len - 1));
+ if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, TD, TLI))
+ return 0;
+ return DstEnd;
+ }
+ return 0;
+ }
+};
+
+struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ this->CI = CI;
+ StringRef Name = Callee->getName();
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getParent()->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+ !FT->getParamType(2)->isIntegerTy() ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return 0;
+
+ if (isFoldable(3, 2, false)) {
+ Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TD, TLI,
+ Name.substr(2, 7));
+ return Ret;
+ }
+ return 0;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// String and Memory Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+struct StrCatOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strcat" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ FT->getParamType(1) != FT->getReturnType())
+ return 0;
+
+ // Extract some information from the instruction
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+ --Len; // Unbias length.
+
+ // Handle the simple, do-nothing case: strcat(x, "") -> x
+ if (Len == 0)
+ return Dst;
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ return emitStrLenMemCpy(Src, Dst, Len, B);
+ }
+
+ Value *emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
+ IRBuilder<> &B) {
+ // We need to find the end of the destination string. That's where the
+ // memory is to be moved to. We just generate a call to strlen.
+ Value *DstLen = EmitStrLen(Dst, B, TD, TLI);
+ if (!DstLen)
+ return 0;
+
+ // Now that we have the destination's length, we must index into the
+ // destination's pointer to get the actual memcpy destination (end of
+ // the string .. we're concatenating).
+ Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
+
+ // We have enough information to now generate the memcpy call to do the
+ // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
+ B.CreateMemCpy(CpyDst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
+ return Dst;
+ }
+};
+
+struct StrNCatOpt : public StrCatOpt {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strncat" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ FT->getParamType(1) != FT->getReturnType() ||
+ !FT->getParamType(2)->isIntegerTy())
+ return 0;
+
+ // Extract some information from the instruction
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ uint64_t Len;
+
+ // We don't do anything if length is not constant
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+ Len = LengthArg->getZExtValue();
+ else
+ return 0;
+
+ // See if we can get the length of the input string.
+ uint64_t SrcLen = GetStringLength(Src);
+ if (SrcLen == 0) return 0;
+ --SrcLen; // Unbias length.
+
+ // Handle the simple, do-nothing cases:
+ // strncat(x, "", c) -> x
+ // strncat(x, c, 0) -> x
+ if (SrcLen == 0 || Len == 0) return Dst;
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ // We don't optimize this case
+ if (Len < SrcLen) return 0;
+
+ // strncat(x, s, c) -> strcat(x, s)
+ // s is constant so the strcat can be optimized further
+ return emitStrLenMemCpy(Src, Dst, SrcLen, B);
+ }
+};
+
+struct StrChrOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strchr" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ !FT->getParamType(1)->isIntegerTy(32))
+ return 0;
+
+ Value *SrcStr = CI->getArgOperand(0);
+
+ // If the second operand is non-constant, see if we can compute the length
+ // of the input string and turn this into memchr.
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ if (CharC == 0) {
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ uint64_t Len = GetStringLength(SrcStr);
+ if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
+ return 0;
+
+ return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
+ ConstantInt::get(TD->getIntPtrType(*Context), Len),
+ B, TD, TLI);
+ }
+
+ // Otherwise, the character is a constant, see if the first argument is
+ // a string literal. If so, we can constant fold.
+ StringRef Str;
+ if (!getConstantStringInfo(SrcStr, Str))
+ return 0;
+
+ // Compute the offset, make sure to handle the case when we're searching for
+ // zero (a weird way to spell strlen).
+ size_t I = CharC->getSExtValue() == 0 ?
+ Str.size() : Str.find(CharC->getSExtValue());
+ if (I == StringRef::npos) // Didn't find the char. strchr returns null.
+ return Constant::getNullValue(CI->getType());
+
+ // strchr(s+n,c) -> gep(s+n+i,c)
+ return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
+ }
+};
+
+struct StrRChrOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strrchr" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ !FT->getParamType(1)->isIntegerTy(32))
+ return 0;
+
+ Value *SrcStr = CI->getArgOperand(0);
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+
+ // Cannot fold anything if we're not looking for a constant.
+ if (!CharC)
+ return 0;
+
+ StringRef Str;
+ if (!getConstantStringInfo(SrcStr, Str)) {
+ // strrchr(s, 0) -> strchr(s, 0)
+ if (TD && CharC->isZero())
+ return EmitStrChr(SrcStr, '\0', B, TD, TLI);
+ return 0;
+ }
+
+ // Compute the offset.
+ size_t I = CharC->getSExtValue() == 0 ?
+ Str.size() : Str.rfind(CharC->getSExtValue());
+ if (I == StringRef::npos) // Didn't find the char. Return null.
+ return Constant::getNullValue(CI->getType());
+
+ // strrchr(s+n,c) -> gep(s+n+i,c)
+ return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
+ }
+};
+
+struct StrCmpOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strcmp" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ !FT->getReturnType()->isIntegerTy(32) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy())
+ return 0;
+
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+ if (Str1P == Str2P) // strcmp(x,x) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ StringRef Str1, Str2;
+ bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = getConstantStringInfo(Str2P, Str2);
+
+ // strcmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2)
+ return ConstantInt::get(CI->getType(), Str1.compare(Str2));
+
+ if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
+ return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
+ CI->getType()));
+
+ if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+ // strcmp(P, "x") -> memcmp(P, "x", 2)
+ uint64_t Len1 = GetStringLength(Str1P);
+ uint64_t Len2 = GetStringLength(Str2P);
+ if (Len1 && Len2) {
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ return EmitMemCmp(Str1P, Str2P,
+ ConstantInt::get(TD->getIntPtrType(*Context),
+ std::min(Len1, Len2)), B, TD, TLI);
+ }
+
+ return 0;
+ }
+};
+
+struct StrNCmpOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strncmp" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 ||
+ !FT->getReturnType()->isIntegerTy(32) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ !FT->getParamType(2)->isIntegerTy())
+ return 0;
+
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+ if (Str1P == Str2P) // strncmp(x,x,n) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ // Get the length argument if it is constant.
+ uint64_t Length;
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+ Length = LengthArg->getZExtValue();
+ else
+ return 0;
+
+ if (Length == 0) // strncmp(x,y,0) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
+ return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI);
+
+ StringRef Str1, Str2;
+ bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = getConstantStringInfo(Str2P, Str2);
+
+ // strncmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2) {
+ StringRef SubStr1 = Str1.substr(0, Length);
+ StringRef SubStr2 = Str2.substr(0, Length);
+ return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2));
+ }
+
+ if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x
+ return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
+ CI->getType()));
+
+ if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+ return 0;
+ }
+};
+
+struct StrCpyOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strcpy" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy())
+ return 0;
+
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) // strcpy(x,x) -> x
+ return Src;
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+
+ // We have enough information to now generate the memcpy call to do the
+ // copy for us. Make a memcpy to copy the nul byte with align = 1.
+ B.CreateMemCpy(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
+ return Dst;
+ }
+};
+
+struct StpCpyOpt: public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "stpcpy" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy())
+ return 0;
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
+ Value *StrLen = EmitStrLen(Src, B, TD, TLI);
+ return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
+ }
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+
+ Type *PT = FT->getParamType(0);
+ Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len);
+ Value *DstEnd = B.CreateGEP(Dst,
+ ConstantInt::get(TD->getIntPtrType(PT),
+ Len - 1));
+
+ // We have enough information to now generate the memcpy call to do the
+ // copy for us. Make a memcpy to copy the nul byte with align = 1.
+ B.CreateMemCpy(Dst, Src, LenV, 1);
+ return DstEnd;
+ }
+};
+
+struct StrNCpyOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ !FT->getParamType(2)->isIntegerTy())
+ return 0;
+
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ Value *LenOp = CI->getArgOperand(2);
+
+ // See if we can get the length of the input string.
+ uint64_t SrcLen = GetStringLength(Src);
+ if (SrcLen == 0) return 0;
+ --SrcLen;
+
+ if (SrcLen == 0) {
+ // strncpy(x, "", y) -> memset(x, '\0', y, 1)
+ B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1);
+ return Dst;
+ }
+
+ uint64_t Len;
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp))
+ Len = LengthArg->getZExtValue();
+ else
+ return 0;
+
+ if (Len == 0) return Dst; // strncpy(x, y, 0) -> x
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ // Let strncpy handle the zero padding
+ if (Len > SrcLen+1) return 0;
+
+ Type *PT = FT->getParamType(0);
+ // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
+ B.CreateMemCpy(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(PT), Len), 1);
+
+ return Dst;
+ }
+};
+
+struct StrLenOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 1 ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ Value *Src = CI->getArgOperand(0);
+
+ // Constant folding: strlen("xyz") -> 3
+ if (uint64_t Len = GetStringLength(Src))
+ return ConstantInt::get(CI->getType(), Len-1);
+
+ // strlen(x) != 0 --> *x != 0
+ // strlen(x) == 0 --> *x == 0
+ if (isOnlyUsedInZeroEqualityComparison(CI))
+ return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
+ return 0;
+ }
+};
+
+struct StrPBrkOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ FT->getReturnType() != FT->getParamType(0))
+ return 0;
+
+ StringRef S1, S2;
+ bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strpbrk(s, "") -> NULL
+ // strpbrk("", s) -> NULL
+ if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2) {
+ size_t I = S1.find_first_of(S2);
+ if (I == std::string::npos) // No match.
+ return Constant::getNullValue(CI->getType());
+
+ return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
+ }
+
+ // strpbrk(s, "a") -> strchr(s, 'a')
+ if (TD && HasS2 && S2.size() == 1)
+ return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI);
+
+ return 0;
+ }
+};
+
+struct StrToOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy())
+ return 0;
+
+ Value *EndPtr = CI->getArgOperand(1);
+ if (isa<ConstantPointerNull>(EndPtr)) {
+ // With a null EndPtr, this function won't capture the main argument.
+ // It would be readonly too, except that it still may write to errno.
+ CI->addAttribute(1, Attributes::get(Callee->getContext(),
+ Attributes::NoCapture));
+ }
+
+ return 0;
+ }
+};
+
+struct StrSpnOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ StringRef S1, S2;
+ bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strspn(s, "") -> 0
+ // strspn("", s) -> 0
+ if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2) {
+ size_t Pos = S1.find_first_not_of(S2);
+ if (Pos == StringRef::npos) Pos = S1.size();
+ return ConstantInt::get(CI->getType(), Pos);
+ }
+
+ return 0;
+ }
+};
+
+struct StrCSpnOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ StringRef S1, S2;
+ bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strcspn("", s) -> 0
+ if (HasS1 && S1.empty())
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2) {
+ size_t Pos = S1.find_first_of(S2);
+ if (Pos == StringRef::npos) Pos = S1.size();
+ return ConstantInt::get(CI->getType(), Pos);
+ }
+
+ // strcspn(s, "") -> strlen(s)
+ if (TD && HasS2 && S2.empty())
+ return EmitStrLen(CI->getArgOperand(0), B, TD, TLI);
+
+ return 0;
+ }
+};
+
+struct StrStrOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isPointerTy())
+ return 0;
+
+ // fold strstr(x, x) -> x.
+ if (CI->getArgOperand(0) == CI->getArgOperand(1))
+ return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+
+ // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
+ if (TD && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
+ Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI);
+ if (!StrLen)
+ return 0;
+ Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
+ StrLen, B, TD, TLI);
+ if (!StrNCmp)
+ return 0;
+ for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
+ UI != UE; ) {
+ ICmpInst *Old = cast<ICmpInst>(*UI++);
+ Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp,
+ ConstantInt::getNullValue(StrNCmp->getType()),
+ "cmp");
+ LCS->replaceAllUsesWith(Old, Cmp);
+ }
+ return CI;
+ }
+
+ // See if either input string is a constant string.
+ StringRef SearchStr, ToFindStr;
+ bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr);
+ bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr);
+
+ // fold strstr(x, "") -> x.
+ if (HasStr2 && ToFindStr.empty())
+ return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+
+ // If both strings are known, constant fold it.
+ if (HasStr1 && HasStr2) {
+ std::string::size_type Offset = SearchStr.find(ToFindStr);
+
+ if (Offset == StringRef::npos) // strstr("foo", "bar") -> null
+ return Constant::getNullValue(CI->getType());
+
+ // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
+ Value *Result = CastToCStr(CI->getArgOperand(0), B);
+ Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr");
+ return B.CreateBitCast(Result, CI->getType());
+ }
+
+ // fold strstr(x, "y") -> strchr(x, 'y').
+ if (HasStr2 && ToFindStr.size() == 1) {
+ Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI);
+ return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0;
+ }
+ return 0;
+ }
+};
+
+struct MemCmpOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy(32))
+ return 0;
+
+ Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
+
+ if (LHS == RHS) // memcmp(s,s,x) -> 0
+ return Constant::getNullValue(CI->getType());
+
+ // Make sure we have a constant length.
+ ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ if (!LenC) return 0;
+ uint64_t Len = LenC->getZExtValue();
+
+ if (Len == 0) // memcmp(s1,s2,0) -> 0
+ return Constant::getNullValue(CI->getType());
+
+ // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
+ if (Len == 1) {
+ Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"),
+ CI->getType(), "lhsv");
+ Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"),
+ CI->getType(), "rhsv");
+ return B.CreateSub(LHSV, RHSV, "chardiff");
+ }
+
+ // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
+ StringRef LHSStr, RHSStr;
+ if (getConstantStringInfo(LHS, LHSStr) &&
+ getConstantStringInfo(RHS, RHSStr)) {
+ // Make sure we're not reading out-of-bounds memory.
+ if (Len > LHSStr.size() || Len > RHSStr.size())
+ return 0;
+ uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len);
+ return ConstantInt::get(CI->getType(), Ret);
+ }
+
+ return 0;
+ }
+};
+
+struct MemCpyOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(*Context))
+ return 0;
+
+ // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+};
+
+struct MemMoveOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(*Context))
+ return 0;
+
+ // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
+ B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+};
+
+struct MemSetOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(*Context))
+ return 0;
+
+ // memset(p, v, n) -> llvm.memset(p, v, n, 1)
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+};
+
+} // End anonymous namespace.
+
+namespace llvm {
+
+class LibCallSimplifierImpl {
+ const DataLayout *TD;
+ const TargetLibraryInfo *TLI;
+ const LibCallSimplifier *LCS;
+ StringMap<LibCallOptimization*> Optimizations;
+
+ // Fortified library call optimizations.
+ MemCpyChkOpt MemCpyChk;
+ MemMoveChkOpt MemMoveChk;
+ MemSetChkOpt MemSetChk;
+ StrCpyChkOpt StrCpyChk;
+ StpCpyChkOpt StpCpyChk;
+ StrNCpyChkOpt StrNCpyChk;
+
+ // String library call optimizations.
+ StrCatOpt StrCat;
+ StrNCatOpt StrNCat;
+ StrChrOpt StrChr;
+ StrRChrOpt StrRChr;
+ StrCmpOpt StrCmp;
+ StrNCmpOpt StrNCmp;
+ StrCpyOpt StrCpy;
+ StpCpyOpt StpCpy;
+ StrNCpyOpt StrNCpy;
+ StrLenOpt StrLen;
+ StrPBrkOpt StrPBrk;
+ StrToOpt StrTo;
+ StrSpnOpt StrSpn;
+ StrCSpnOpt StrCSpn;
+ StrStrOpt StrStr;
+
+ // Memory library call optimizations.
+ MemCmpOpt MemCmp;
+ MemCpyOpt MemCpy;
+ MemMoveOpt MemMove;
+ MemSetOpt MemSet;
+
+ void initOptimizations();
+ void addOpt(LibFunc::Func F, LibCallOptimization* Opt);
+public:
+ LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI,
+ const LibCallSimplifier *LCS) {
+ this->TD = TD;
+ this->TLI = TLI;
+ this->LCS = LCS;
+ }
+
+ Value *optimizeCall(CallInst *CI);
+};
+
+void LibCallSimplifierImpl::initOptimizations() {
+ // Fortified library call optimizations.
+ Optimizations["__memcpy_chk"] = &MemCpyChk;
+ Optimizations["__memmove_chk"] = &MemMoveChk;
+ Optimizations["__memset_chk"] = &MemSetChk;
+ Optimizations["__strcpy_chk"] = &StrCpyChk;
+ Optimizations["__stpcpy_chk"] = &StpCpyChk;
+ Optimizations["__strncpy_chk"] = &StrNCpyChk;
+ Optimizations["__stpncpy_chk"] = &StrNCpyChk;
+
+ // String library call optimizations.
+ addOpt(LibFunc::strcat, &StrCat);
+ addOpt(LibFunc::strncat, &StrNCat);
+ addOpt(LibFunc::strchr, &StrChr);
+ addOpt(LibFunc::strrchr, &StrRChr);
+ addOpt(LibFunc::strcmp, &StrCmp);
+ addOpt(LibFunc::strncmp, &StrNCmp);
+ addOpt(LibFunc::strcpy, &StrCpy);
+ addOpt(LibFunc::stpcpy, &StpCpy);
+ addOpt(LibFunc::strncpy, &StrNCpy);
+ addOpt(LibFunc::strlen, &StrLen);
+ addOpt(LibFunc::strpbrk, &StrPBrk);
+ addOpt(LibFunc::strtol, &StrTo);
+ addOpt(LibFunc::strtod, &StrTo);
+ addOpt(LibFunc::strtof, &StrTo);
+ addOpt(LibFunc::strtoul, &StrTo);
+ addOpt(LibFunc::strtoll, &StrTo);
+ addOpt(LibFunc::strtold, &StrTo);
+ addOpt(LibFunc::strtoull, &StrTo);
+ addOpt(LibFunc::strspn, &StrSpn);
+ addOpt(LibFunc::strcspn, &StrCSpn);
+ addOpt(LibFunc::strstr, &StrStr);
+
+ // Memory library call optimizations.
+ addOpt(LibFunc::memcmp, &MemCmp);
+ addOpt(LibFunc::memcpy, &MemCpy);
+ addOpt(LibFunc::memmove, &MemMove);
+ addOpt(LibFunc::memset, &MemSet);
+}
+
+Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
+ if (Optimizations.empty())
+ initOptimizations();
+
+ Function *Callee = CI->getCalledFunction();
+ LibCallOptimization *LCO = Optimizations.lookup(Callee->getName());
+ if (LCO) {
+ IRBuilder<> Builder(CI);
+ return LCO->optimizeCall(CI, TD, TLI, LCS, Builder);
+ }
+ return 0;
+}
+
+void LibCallSimplifierImpl::addOpt(LibFunc::Func F, LibCallOptimization* Opt) {
+ if (TLI->has(F))
+ Optimizations[TLI->getName(F)] = Opt;
+}
+
+LibCallSimplifier::LibCallSimplifier(const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ Impl = new LibCallSimplifierImpl(TD, TLI, this);
+}
+
+LibCallSimplifier::~LibCallSimplifier() {
+ delete Impl;
+}
+
+Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
+ return Impl->optimizeCall(CI);
+}
+
+void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const {
+ I->replaceAllUsesWith(With);
+ I->eraseFromParent();
+}
+
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
index 24e8c8f..5812d46 100644
--- a/contrib/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
@@ -29,6 +29,7 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) {
initializePromotePassPass(Registry);
initializeUnifyFunctionExitNodesPass(Registry);
initializeInstSimplifierPass(Registry);
+ initializeMetaRenamerPass(Registry);
}
/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses.
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
index fc2538d..a30b093 100644
--- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -21,7 +21,7 @@
using namespace llvm;
// Out of line method to get vtable etc for class.
-void ValueMapTypeRemapper::Anchor() {}
+void ValueMapTypeRemapper::anchor() {}
Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper) {
diff --git a/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
index 62d23cb..f7be3e3 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -28,12 +28,14 @@
#include "llvm/Type.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -41,17 +43,27 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/ValueHandle.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
+#include "llvm/TargetTransformInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Vectorize.h"
#include <algorithm>
#include <map>
using namespace llvm;
+static cl::opt<bool>
+IgnoreTargetInfo("bb-vectorize-ignore-target-info", cl::init(false),
+ cl::Hidden, cl::desc("Ignore target information"));
+
static cl::opt<unsigned>
ReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden,
cl::desc("The required chain depth for vectorization"));
+static cl::opt<bool>
+UseChainDepthWithTI("bb-vectorize-use-chain-depth", cl::init(false),
+ cl::Hidden, cl::desc("Use the chain depth requirement with"
+ " target information"));
+
static cl::opt<unsigned>
SearchLimit("bb-vectorize-search-limit", cl::init(400), cl::Hidden,
cl::desc("The maximum search distance for instruction pairs"));
@@ -93,8 +105,9 @@ static cl::opt<bool>
NoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden,
cl::desc("Don't try to vectorize floating-point values"));
+// FIXME: This should default to false once pointer vector support works.
static cl::opt<bool>
-NoPointers("bb-vectorize-no-pointers", cl::init(false), cl::Hidden,
+NoPointers("bb-vectorize-no-pointers", cl::init(/*false*/ true), cl::Hidden,
cl::desc("Don't try to vectorize pointer values"));
static cl::opt<bool>
@@ -159,6 +172,12 @@ DebugCycleCheck("bb-vectorize-debug-cycle-check",
cl::init(false), cl::Hidden,
cl::desc("When debugging is enabled, output information on the"
" cycle-checking process"));
+
+static cl::opt<bool>
+PrintAfterEveryPair("bb-vectorize-debug-print-after-every-pair",
+ cl::init(false), cl::Hidden,
+ cl::desc("When debugging is enabled, dump the basic block after"
+ " every pair is fused"));
#endif
STATISTIC(NumFusedOps, "Number of operations fused by bb-vectorize");
@@ -177,13 +196,19 @@ namespace {
BBVectorize(Pass *P, const VectorizeConfig &C)
: BasicBlockPass(ID), Config(C) {
AA = &P->getAnalysis<AliasAnalysis>();
+ DT = &P->getAnalysis<DominatorTree>();
SE = &P->getAnalysis<ScalarEvolution>();
- TD = P->getAnalysisIfAvailable<TargetData>();
+ TD = P->getAnalysisIfAvailable<DataLayout>();
+ TTI = IgnoreTargetInfo ? 0 :
+ P->getAnalysisIfAvailable<TargetTransformInfo>();
+ VTTI = TTI ? TTI->getVectorTargetTransformInfo() : 0;
}
typedef std::pair<Value *, Value *> ValuePair;
+ typedef std::pair<ValuePair, int> ValuePairWithCost;
typedef std::pair<ValuePair, size_t> ValuePairWithDepth;
typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair
+ typedef std::pair<VPPair, unsigned> VPPairWithType;
typedef std::pair<std::multimap<Value *, Value *>::iterator,
std::multimap<Value *, Value *>::iterator> VPIteratorPair;
typedef std::pair<std::multimap<ValuePair, ValuePair>::iterator,
@@ -191,8 +216,11 @@ namespace {
VPPIteratorPair;
AliasAnalysis *AA;
+ DominatorTree *DT;
ScalarEvolution *SE;
- TargetData *TD;
+ DataLayout *TD;
+ TargetTransformInfo *TTI;
+ const VectorTargetTransformInfo *VTTI;
// FIXME: const correct?
@@ -201,11 +229,23 @@ namespace {
bool getCandidatePairs(BasicBlock &BB,
BasicBlock::iterator &Start,
std::multimap<Value *, Value *> &CandidatePairs,
+ DenseSet<ValuePair> &FixedOrderPairs,
+ DenseMap<ValuePair, int> &CandidatePairCostSavings,
std::vector<Value *> &PairableInsts, bool NonPow2Len);
+ // FIXME: The current implementation does not account for pairs that
+ // are connected in multiple ways. For example:
+ // C1 = A1 / A2; C2 = A2 / A1 (which may be both direct and a swap)
+ enum PairConnectionType {
+ PairConnectionDirect,
+ PairConnectionSwap,
+ PairConnectionSplat
+ };
+
void computeConnectedPairs(std::multimap<Value *, Value *> &CandidatePairs,
std::vector<Value *> &PairableInsts,
- std::multimap<ValuePair, ValuePair> &ConnectedPairs);
+ std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes);
void buildDepMap(BasicBlock &BB,
std::multimap<Value *, Value *> &CandidatePairs,
@@ -213,19 +253,29 @@ namespace {
DenseSet<ValuePair> &PairableInstUsers);
void choosePairs(std::multimap<Value *, Value *> &CandidatePairs,
+ DenseMap<ValuePair, int> &CandidatePairCostSavings,
std::vector<Value *> &PairableInsts,
+ DenseSet<ValuePair> &FixedOrderPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairDeps,
DenseSet<ValuePair> &PairableInstUsers,
DenseMap<Value *, Value *>& ChosenPairs);
void fuseChosenPairs(BasicBlock &BB,
std::vector<Value *> &PairableInsts,
- DenseMap<Value *, Value *>& ChosenPairs);
+ DenseMap<Value *, Value *>& ChosenPairs,
+ DenseSet<ValuePair> &FixedOrderPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairDeps);
+
bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore);
bool areInstsCompatible(Instruction *I, Instruction *J,
- bool IsSimpleLoadStore, bool NonPow2Len);
+ bool IsSimpleLoadStore, bool NonPow2Len,
+ int &CostSavings, int &FixedOrder);
bool trackUsesOfI(DenseSet<Value *> &Users,
AliasSetTracker &WriteSet, Instruction *I,
@@ -236,6 +286,7 @@ namespace {
std::multimap<Value *, Value *> &CandidatePairs,
std::vector<Value *> &PairableInsts,
std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
ValuePair P);
bool pairsConflict(ValuePair P, ValuePair Q,
@@ -267,17 +318,21 @@ namespace {
void findBestTreeFor(
std::multimap<Value *, Value *> &CandidatePairs,
+ DenseMap<ValuePair, int> &CandidatePairCostSavings,
std::vector<Value *> &PairableInsts,
+ DenseSet<ValuePair> &FixedOrderPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairDeps,
DenseSet<ValuePair> &PairableInstUsers,
std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
DenseMap<Value *, Value *> &ChosenPairs,
DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth,
- size_t &BestEffSize, VPIteratorPair ChoiceRange,
+ int &BestEffSize, VPIteratorPair ChoiceRange,
bool UseCycleCheck);
Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I,
- Instruction *J, unsigned o, bool FlipMemInputs);
+ Instruction *J, unsigned o);
void fillNewShuffleMask(LLVMContext& Context, Instruction *J,
unsigned MaskOffset, unsigned NumInElem,
@@ -289,20 +344,20 @@ namespace {
bool expandIEChain(LLVMContext& Context, Instruction *I, Instruction *J,
unsigned o, Value *&LOp, unsigned numElemL,
- Type *ArgTypeL, Type *ArgTypeR,
+ Type *ArgTypeL, Type *ArgTypeR, bool IBeforeJ,
unsigned IdxOff = 0);
Value *getReplacementInput(LLVMContext& Context, Instruction *I,
- Instruction *J, unsigned o, bool FlipMemInputs);
+ Instruction *J, unsigned o, bool IBeforeJ);
void getReplacementInputsForPair(LLVMContext& Context, Instruction *I,
Instruction *J, SmallVector<Value *, 3> &ReplacedOperands,
- bool FlipMemInputs);
+ bool IBeforeJ);
void replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
Instruction *J, Instruction *K,
Instruction *&InsertionPt, Instruction *&K1,
- Instruction *&K2, bool FlipMemInputs);
+ Instruction *&K2);
void collectPairLoadMoveSet(BasicBlock &BB,
DenseMap<Value *, Value *> &ChosenPairs,
@@ -314,10 +369,6 @@ namespace {
DenseMap<Value *, Value *> &ChosenPairs,
std::multimap<Value *, Value *> &LoadMoveSet);
- void collectPtrInfo(std::vector<Value *> &PairableInsts,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseSet<Value *> &LowPtrInsts);
-
bool canMoveUsesOfIAfterJ(BasicBlock &BB,
std::multimap<Value *, Value *> &LoadMoveSet,
Instruction *I, Instruction *J);
@@ -330,13 +381,22 @@ namespace {
void combineMetadata(Instruction *K, const Instruction *J);
bool vectorizeBB(BasicBlock &BB) {
+ if (!DT->isReachableFromEntry(&BB)) {
+ DEBUG(dbgs() << "BBV: skipping unreachable " << BB.getName() <<
+ " in " << BB.getParent()->getName() << "\n");
+ return false;
+ }
+
+ DEBUG(if (VTTI) dbgs() << "BBV: using target information\n");
+
bool changed = false;
// Iterate a sufficient number of times to merge types of size 1 bit,
// then 2 bits, then 4, etc. up to half of the target vector width of the
// target vector register.
unsigned n = 1;
for (unsigned v = 2;
- v <= Config.VectorBits && (!Config.MaxIter || n <= Config.MaxIter);
+ (VTTI || v <= Config.VectorBits) &&
+ (!Config.MaxIter || n <= Config.MaxIter);
v *= 2, ++n) {
DEBUG(dbgs() << "BBV: fusing loop #" << n <<
" for " << BB.getName() << " in " <<
@@ -363,8 +423,12 @@ namespace {
virtual bool runOnBasicBlock(BasicBlock &BB) {
AA = &getAnalysis<AliasAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
SE = &getAnalysis<ScalarEvolution>();
- TD = getAnalysisIfAvailable<TargetData>();
+ TD = getAnalysisIfAvailable<DataLayout>();
+ TTI = IgnoreTargetInfo ? 0 :
+ getAnalysisIfAvailable<TargetTransformInfo>();
+ VTTI = TTI ? TTI->getVectorTargetTransformInfo() : 0;
return vectorizeBB(BB);
}
@@ -372,8 +436,10 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
BasicBlockPass::getAnalysisUsage(AU);
AU.addRequired<AliasAnalysis>();
+ AU.addRequired<DominatorTree>();
AU.addRequired<ScalarEvolution>();
AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<DominatorTree>();
AU.addPreserved<ScalarEvolution>();
AU.setPreservesCFG();
}
@@ -415,6 +481,14 @@ namespace {
T2 = cast<CastInst>(I)->getSrcTy();
else
T2 = T1;
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ T2 = SI->getCondition()->getType();
+ } else if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(I)) {
+ T2 = SI->getOperand(0)->getType();
+ } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
+ T2 = CI->getOperand(0)->getType();
+ }
}
// Returns the weight associated with the provided value. A chain of
@@ -446,6 +520,62 @@ namespace {
return 1;
}
+ // Returns the cost of the provided instruction using VTTI.
+ // This does not handle loads and stores.
+ unsigned getInstrCost(unsigned Opcode, Type *T1, Type *T2) {
+ switch (Opcode) {
+ default: break;
+ case Instruction::GetElementPtr:
+ // We mark this instruction as zero-cost because scalar GEPs are usually
+ // lowered to the intruction addressing mode. At the moment we don't
+ // generate vector GEPs.
+ return 0;
+ case Instruction::Br:
+ return VTTI->getCFInstrCost(Opcode);
+ case Instruction::PHI:
+ return 0;
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ return VTTI->getArithmeticInstrCost(Opcode, T1);
+ case Instruction::Select:
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return VTTI->getCmpSelInstrCost(Opcode, T1, T2);
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast:
+ case Instruction::ShuffleVector:
+ return VTTI->getCastInstrCost(Opcode, T1, T2);
+ }
+
+ return 1;
+ }
+
// This determines the relative offset of two loads or stores, returning
// true if the offset could be determined to be some constant value.
// For example, if OffsetInElmts == 1, then J accesses the memory directly
@@ -453,20 +583,30 @@ namespace {
// directly after J.
bool getPairPtrInfo(Instruction *I, Instruction *J,
Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment,
- int64_t &OffsetInElmts) {
+ unsigned &IAddressSpace, unsigned &JAddressSpace,
+ int64_t &OffsetInElmts, bool ComputeOffset = true) {
OffsetInElmts = 0;
- if (isa<LoadInst>(I)) {
- IPtr = cast<LoadInst>(I)->getPointerOperand();
- JPtr = cast<LoadInst>(J)->getPointerOperand();
- IAlignment = cast<LoadInst>(I)->getAlignment();
- JAlignment = cast<LoadInst>(J)->getAlignment();
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ LoadInst *LJ = cast<LoadInst>(J);
+ IPtr = LI->getPointerOperand();
+ JPtr = LJ->getPointerOperand();
+ IAlignment = LI->getAlignment();
+ JAlignment = LJ->getAlignment();
+ IAddressSpace = LI->getPointerAddressSpace();
+ JAddressSpace = LJ->getPointerAddressSpace();
} else {
- IPtr = cast<StoreInst>(I)->getPointerOperand();
- JPtr = cast<StoreInst>(J)->getPointerOperand();
- IAlignment = cast<StoreInst>(I)->getAlignment();
- JAlignment = cast<StoreInst>(J)->getAlignment();
+ StoreInst *SI = cast<StoreInst>(I), *SJ = cast<StoreInst>(J);
+ IPtr = SI->getPointerOperand();
+ JPtr = SJ->getPointerOperand();
+ IAlignment = SI->getAlignment();
+ JAlignment = SJ->getAlignment();
+ IAddressSpace = SI->getPointerAddressSpace();
+ JAddressSpace = SJ->getPointerAddressSpace();
}
+ if (!ComputeOffset)
+ return true;
+
const SCEV *IPtrSCEV = SE->getSCEV(IPtr);
const SCEV *JPtrSCEV = SE->getSCEV(JPtr);
@@ -536,6 +676,19 @@ namespace {
return false;
}
+
+ bool isPureIEChain(InsertElementInst *IE) {
+ InsertElementInst *IENext = IE;
+ do {
+ if (!isa<UndefValue>(IENext->getOperand(0)) &&
+ !isa<InsertElementInst>(IENext->getOperand(0))) {
+ return false;
+ }
+ } while ((IENext =
+ dyn_cast<InsertElementInst>(IENext->getOperand(0))));
+
+ return true;
+ }
};
// This function implements one vectorization iteration on the provided
@@ -546,11 +699,18 @@ namespace {
std::vector<Value *> AllPairableInsts;
DenseMap<Value *, Value *> AllChosenPairs;
+ DenseSet<ValuePair> AllFixedOrderPairs;
+ DenseMap<VPPair, unsigned> AllPairConnectionTypes;
+ std::multimap<ValuePair, ValuePair> AllConnectedPairs, AllConnectedPairDeps;
do {
std::vector<Value *> PairableInsts;
std::multimap<Value *, Value *> CandidatePairs;
+ DenseSet<ValuePair> FixedOrderPairs;
+ DenseMap<ValuePair, int> CandidatePairCostSavings;
ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs,
+ FixedOrderPairs,
+ CandidatePairCostSavings,
PairableInsts, NonPow2Len);
if (PairableInsts.empty()) continue;
@@ -563,10 +723,18 @@ namespace {
// Note that it only matters that both members of the second pair use some
// element of the first pair (to allow for splatting).
- std::multimap<ValuePair, ValuePair> ConnectedPairs;
- computeConnectedPairs(CandidatePairs, PairableInsts, ConnectedPairs);
+ std::multimap<ValuePair, ValuePair> ConnectedPairs, ConnectedPairDeps;
+ DenseMap<VPPair, unsigned> PairConnectionTypes;
+ computeConnectedPairs(CandidatePairs, PairableInsts, ConnectedPairs,
+ PairConnectionTypes);
if (ConnectedPairs.empty()) continue;
+ for (std::multimap<ValuePair, ValuePair>::iterator
+ I = ConnectedPairs.begin(), IE = ConnectedPairs.end();
+ I != IE; ++I) {
+ ConnectedPairDeps.insert(VPPair(I->second, I->first));
+ }
+
// Build the pairable-instruction dependency map
DenseSet<ValuePair> PairableInstUsers;
buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers);
@@ -578,13 +746,48 @@ namespace {
// variables.
DenseMap<Value *, Value *> ChosenPairs;
- choosePairs(CandidatePairs, PairableInsts, ConnectedPairs,
+ choosePairs(CandidatePairs, CandidatePairCostSavings,
+ PairableInsts, FixedOrderPairs, PairConnectionTypes,
+ ConnectedPairs, ConnectedPairDeps,
PairableInstUsers, ChosenPairs);
if (ChosenPairs.empty()) continue;
AllPairableInsts.insert(AllPairableInsts.end(), PairableInsts.begin(),
PairableInsts.end());
AllChosenPairs.insert(ChosenPairs.begin(), ChosenPairs.end());
+
+ // Only for the chosen pairs, propagate information on fixed-order pairs,
+ // pair connections, and their types to the data structures used by the
+ // pair fusion procedures.
+ for (DenseMap<Value *, Value *>::iterator I = ChosenPairs.begin(),
+ IE = ChosenPairs.end(); I != IE; ++I) {
+ if (FixedOrderPairs.count(*I))
+ AllFixedOrderPairs.insert(*I);
+ else if (FixedOrderPairs.count(ValuePair(I->second, I->first)))
+ AllFixedOrderPairs.insert(ValuePair(I->second, I->first));
+
+ for (DenseMap<Value *, Value *>::iterator J = ChosenPairs.begin();
+ J != IE; ++J) {
+ DenseMap<VPPair, unsigned>::iterator K =
+ PairConnectionTypes.find(VPPair(*I, *J));
+ if (K != PairConnectionTypes.end()) {
+ AllPairConnectionTypes.insert(*K);
+ } else {
+ K = PairConnectionTypes.find(VPPair(*J, *I));
+ if (K != PairConnectionTypes.end())
+ AllPairConnectionTypes.insert(*K);
+ }
+ }
+ }
+
+ for (std::multimap<ValuePair, ValuePair>::iterator
+ I = ConnectedPairs.begin(), IE = ConnectedPairs.end();
+ I != IE; ++I) {
+ if (AllPairConnectionTypes.count(*I)) {
+ AllConnectedPairs.insert(*I);
+ AllConnectedPairDeps.insert(VPPair(I->second, I->first));
+ }
+ }
} while (ShouldContinue);
if (AllChosenPairs.empty()) return false;
@@ -597,11 +800,13 @@ namespace {
// replaced with a vector_extract on the result. Subsequent optimization
// passes should coalesce the build/extract combinations.
- fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs);
+ fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs, AllFixedOrderPairs,
+ AllPairConnectionTypes,
+ AllConnectedPairs, AllConnectedPairDeps);
// It is important to cleanup here so that future iterations of this
// function have less work to do.
- (void) SimplifyInstructionsInBlock(&BB, TD);
+ (void) SimplifyInstructionsInBlock(&BB, TD, AA->getTargetLibraryInfo());
return true;
}
@@ -667,15 +872,22 @@ namespace {
!(VectorType::isValidElementType(T2) || T2->isVectorTy()))
return false;
- if (T1->getScalarSizeInBits() == 1 && T2->getScalarSizeInBits() == 1) {
+ if (T1->getScalarSizeInBits() == 1) {
if (!Config.VectorizeBools)
return false;
} else {
- if (!Config.VectorizeInts
- && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy()))
+ if (!Config.VectorizeInts && T1->isIntOrIntVectorTy())
return false;
}
-
+
+ if (T2->getScalarSizeInBits() == 1) {
+ if (!Config.VectorizeBools)
+ return false;
+ } else {
+ if (!Config.VectorizeInts && T2->isIntOrIntVectorTy())
+ return false;
+ }
+
if (!Config.VectorizeFloats
&& (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy()))
return false;
@@ -691,8 +903,8 @@ namespace {
T2->getScalarType()->isPointerTy()))
return false;
- if (T1->getPrimitiveSizeInBits() >= Config.VectorBits ||
- T2->getPrimitiveSizeInBits() >= Config.VectorBits)
+ if (!VTTI && (T1->getPrimitiveSizeInBits() >= Config.VectorBits ||
+ T2->getPrimitiveSizeInBits() >= Config.VectorBits))
return false;
return true;
@@ -703,10 +915,14 @@ namespace {
// that I has already been determined to be vectorizable and that J is not
// in the use tree of I.
bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J,
- bool IsSimpleLoadStore, bool NonPow2Len) {
+ bool IsSimpleLoadStore, bool NonPow2Len,
+ int &CostSavings, int &FixedOrder) {
DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I <<
" <-> " << *J << "\n");
+ CostSavings = 0;
+ FixedOrder = 0;
+
// Loads and stores can be merged if they have different alignments,
// but are otherwise the same.
if (!J->isSameOperationAs(I, Instruction::CompareIgnoringAlignment |
@@ -719,38 +935,84 @@ namespace {
unsigned MaxTypeBits = std::max(
IT1->getPrimitiveSizeInBits() + JT1->getPrimitiveSizeInBits(),
IT2->getPrimitiveSizeInBits() + JT2->getPrimitiveSizeInBits());
- if (MaxTypeBits > Config.VectorBits)
+ if (!VTTI && MaxTypeBits > Config.VectorBits)
return false;
// FIXME: handle addsub-type operations!
if (IsSimpleLoadStore) {
Value *IPtr, *JPtr;
- unsigned IAlignment, JAlignment;
+ unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
int64_t OffsetInElmts = 0;
if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
+ IAddressSpace, JAddressSpace,
OffsetInElmts) && abs64(OffsetInElmts) == 1) {
- if (Config.AlignedOnly) {
- Type *aTypeI = isa<StoreInst>(I) ?
- cast<StoreInst>(I)->getValueOperand()->getType() : I->getType();
- Type *aTypeJ = isa<StoreInst>(J) ?
- cast<StoreInst>(J)->getValueOperand()->getType() : J->getType();
+ FixedOrder = (int) OffsetInElmts;
+ unsigned BottomAlignment = IAlignment;
+ if (OffsetInElmts < 0) BottomAlignment = JAlignment;
+
+ Type *aTypeI = isa<StoreInst>(I) ?
+ cast<StoreInst>(I)->getValueOperand()->getType() : I->getType();
+ Type *aTypeJ = isa<StoreInst>(J) ?
+ cast<StoreInst>(J)->getValueOperand()->getType() : J->getType();
+ Type *VType = getVecTypeForPair(aTypeI, aTypeJ);
+ if (Config.AlignedOnly) {
// An aligned load or store is possible only if the instruction
// with the lower offset has an alignment suitable for the
// vector type.
- unsigned BottomAlignment = IAlignment;
- if (OffsetInElmts < 0) BottomAlignment = JAlignment;
-
- Type *VType = getVecTypeForPair(aTypeI, aTypeJ);
unsigned VecAlignment = TD->getPrefTypeAlignment(VType);
if (BottomAlignment < VecAlignment)
return false;
}
+
+ if (VTTI) {
+ unsigned ICost = VTTI->getMemoryOpCost(I->getOpcode(), I->getType(),
+ IAlignment, IAddressSpace);
+ unsigned JCost = VTTI->getMemoryOpCost(J->getOpcode(), J->getType(),
+ JAlignment, JAddressSpace);
+ unsigned VCost = VTTI->getMemoryOpCost(I->getOpcode(), VType,
+ BottomAlignment,
+ IAddressSpace);
+ if (VCost > ICost + JCost)
+ return false;
+
+ // We don't want to fuse to a type that will be split, even
+ // if the two input types will also be split and there is no other
+ // associated cost.
+ unsigned VParts = VTTI->getNumberOfParts(VType);
+ if (VParts > 1)
+ return false;
+ else if (!VParts && VCost == ICost + JCost)
+ return false;
+
+ CostSavings = ICost + JCost - VCost;
+ }
} else {
return false;
}
+ } else if (VTTI) {
+ unsigned ICost = getInstrCost(I->getOpcode(), IT1, IT2);
+ unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2);
+ Type *VT1 = getVecTypeForPair(IT1, JT1),
+ *VT2 = getVecTypeForPair(IT2, JT2);
+ unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2);
+
+ if (VCost > ICost + JCost)
+ return false;
+
+ // We don't want to fuse to a type that will be split, even
+ // if the two input types will also be split and there is no other
+ // associated cost.
+ unsigned VParts1 = VTTI->getNumberOfParts(VT1),
+ VParts2 = VTTI->getNumberOfParts(VT2);
+ if (VParts1 > 1 || VParts2 > 1)
+ return false;
+ else if ((!VParts1 || !VParts2) && VCost == ICost + JCost)
+ return false;
+
+ CostSavings = ICost + JCost - VCost;
}
// The powi intrinsic is special because only the first argument is
@@ -833,6 +1095,8 @@ namespace {
bool BBVectorize::getCandidatePairs(BasicBlock &BB,
BasicBlock::iterator &Start,
std::multimap<Value *, Value *> &CandidatePairs,
+ DenseSet<ValuePair> &FixedOrderPairs,
+ DenseMap<ValuePair, int> &CandidatePairCostSavings,
std::vector<Value *> &PairableInsts, bool NonPow2Len) {
BasicBlock::iterator E = BB.end();
if (Start == E) return false;
@@ -869,7 +1133,9 @@ namespace {
// J does not use I, and comes before the first use of I, so it can be
// merged with I if the instructions are compatible.
- if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len)) continue;
+ int CostSavings, FixedOrder;
+ if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len,
+ CostSavings, FixedOrder)) continue;
// J is a candidate for merging with I.
if (!PairableInsts.size() ||
@@ -878,6 +1144,14 @@ namespace {
}
CandidatePairs.insert(ValuePair(I, J));
+ if (VTTI)
+ CandidatePairCostSavings.insert(ValuePairWithCost(ValuePair(I, J),
+ CostSavings));
+
+ if (FixedOrder == 1)
+ FixedOrderPairs.insert(ValuePair(I, J));
+ else if (FixedOrder == -1)
+ FixedOrderPairs.insert(ValuePair(J, I));
// The next call to this function must start after the last instruction
// selected during this invocation.
@@ -887,7 +1161,8 @@ namespace {
}
DEBUG(if (DebugCandidateSelection) dbgs() << "BBV: candidate pair "
- << *I << " <-> " << *J << "\n");
+ << *I << " <-> " << *J << " (cost savings: " <<
+ CostSavings << ")\n");
// If we have already found too many pairs, break here and this function
// will be called again starting after the last instruction selected
@@ -915,6 +1190,7 @@ namespace {
std::multimap<Value *, Value *> &CandidatePairs,
std::vector<Value *> &PairableInsts,
std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
ValuePair P) {
StoreInst *SI, *SJ;
@@ -946,12 +1222,18 @@ namespace {
VPIteratorPair JPairRange = CandidatePairs.equal_range(*J);
// Look for <I, J>:
- if (isSecondInIteratorPair<Value*>(*J, IPairRange))
- ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J)));
+ if (isSecondInIteratorPair<Value*>(*J, IPairRange)) {
+ VPPair VP(P, ValuePair(*I, *J));
+ ConnectedPairs.insert(VP);
+ PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionDirect));
+ }
// Look for <J, I>:
- if (isSecondInIteratorPair<Value*>(*I, JPairRange))
- ConnectedPairs.insert(VPPair(P, ValuePair(*J, *I)));
+ if (isSecondInIteratorPair<Value*>(*I, JPairRange)) {
+ VPPair VP(P, ValuePair(*J, *I));
+ ConnectedPairs.insert(VP);
+ PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSwap));
+ }
}
if (Config.SplatBreaksChain) continue;
@@ -962,8 +1244,11 @@ namespace {
P.first == SJ->getPointerOperand())
continue;
- if (isSecondInIteratorPair<Value*>(*J, IPairRange))
- ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J)));
+ if (isSecondInIteratorPair<Value*>(*J, IPairRange)) {
+ VPPair VP(P, ValuePair(*I, *J));
+ ConnectedPairs.insert(VP);
+ PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat));
+ }
}
}
@@ -985,8 +1270,11 @@ namespace {
P.second == SJ->getPointerOperand())
continue;
- if (isSecondInIteratorPair<Value*>(*J, IPairRange))
- ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J)));
+ if (isSecondInIteratorPair<Value*>(*J, IPairRange)) {
+ VPPair VP(P, ValuePair(*I, *J));
+ ConnectedPairs.insert(VP);
+ PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat));
+ }
}
}
}
@@ -997,7 +1285,8 @@ namespace {
void BBVectorize::computeConnectedPairs(
std::multimap<Value *, Value *> &CandidatePairs,
std::vector<Value *> &PairableInsts,
- std::multimap<ValuePair, ValuePair> &ConnectedPairs) {
+ std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes) {
for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
PE = PairableInsts.end(); PI != PE; ++PI) {
@@ -1006,7 +1295,7 @@ namespace {
for (std::multimap<Value *, Value *>::iterator P = choiceRange.first;
P != choiceRange.second; ++P)
computePairsConnectedTo(CandidatePairs, PairableInsts,
- ConnectedPairs, *P);
+ ConnectedPairs, PairConnectionTypes, *P);
}
DEBUG(dbgs() << "BBV: found " << ConnectedPairs.size()
@@ -1196,7 +1485,7 @@ namespace {
PrunedTree.insert(QTop.first);
// Visit each child, pruning as necessary...
- DenseMap<ValuePair, size_t> BestChildren;
+ SmallVector<ValuePairWithDepth, 8> BestChildren;
VPPIteratorPair QTopRange = ConnectedPairs.equal_range(QTop.first);
for (std::multimap<ValuePair, ValuePair>::iterator K = QTopRange.first;
K != QTopRange.second; ++K) {
@@ -1228,7 +1517,7 @@ namespace {
DenseSet<ValuePair> CurrentPairs;
bool CanAdd = true;
- for (DenseMap<ValuePair, size_t>::iterator C2
+ for (SmallVector<ValuePairWithDepth, 8>::iterator C2
= BestChildren.begin(), E2 = BestChildren.end();
C2 != E2; ++C2) {
if (C2->first.first == C->first.first ||
@@ -1313,22 +1602,22 @@ namespace {
// to an already-selected child. Check for this here, and if a
// conflict is found, then remove the previously-selected child
// before adding this one in its place.
- for (DenseMap<ValuePair, size_t>::iterator C2
+ for (SmallVector<ValuePairWithDepth, 8>::iterator C2
= BestChildren.begin(); C2 != BestChildren.end();) {
if (C2->first.first == C->first.first ||
C2->first.first == C->first.second ||
C2->first.second == C->first.first ||
C2->first.second == C->first.second ||
pairsConflict(C2->first, C->first, PairableInstUsers))
- BestChildren.erase(C2++);
+ C2 = BestChildren.erase(C2);
else
++C2;
}
- BestChildren.insert(ValuePairWithDepth(C->first, C->second));
+ BestChildren.push_back(ValuePairWithDepth(C->first, C->second));
}
- for (DenseMap<ValuePair, size_t>::iterator C
+ for (SmallVector<ValuePairWithDepth, 8>::iterator C
= BestChildren.begin(), E2 = BestChildren.end();
C != E2; ++C) {
size_t DepthF = getDepthFactor(C->first.first);
@@ -1341,13 +1630,17 @@ namespace {
// pairs, given the choice of root pairs as an iterator range.
void BBVectorize::findBestTreeFor(
std::multimap<Value *, Value *> &CandidatePairs,
+ DenseMap<ValuePair, int> &CandidatePairCostSavings,
std::vector<Value *> &PairableInsts,
+ DenseSet<ValuePair> &FixedOrderPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairDeps,
DenseSet<ValuePair> &PairableInstUsers,
std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
DenseMap<Value *, Value *> &ChosenPairs,
DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth,
- size_t &BestEffSize, VPIteratorPair ChoiceRange,
+ int &BestEffSize, VPIteratorPair ChoiceRange,
bool UseCycleCheck) {
for (std::multimap<Value *, Value *>::iterator J = ChoiceRange.first;
J != ChoiceRange.second; ++J) {
@@ -1397,17 +1690,289 @@ namespace {
PairableInstUsers, PairableInstUserMap, ChosenPairs, Tree,
PrunedTree, *J, UseCycleCheck);
- size_t EffSize = 0;
- for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
- E = PrunedTree.end(); S != E; ++S)
- EffSize += getDepthFactor(S->first);
+ int EffSize = 0;
+ if (VTTI) {
+ DenseSet<Value *> PrunedTreeInstrs;
+ for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
+ E = PrunedTree.end(); S != E; ++S) {
+ PrunedTreeInstrs.insert(S->first);
+ PrunedTreeInstrs.insert(S->second);
+ }
+
+ // The set of pairs that have already contributed to the total cost.
+ DenseSet<ValuePair> IncomingPairs;
+
+ // If the cost model were perfect, this might not be necessary; but we
+ // need to make sure that we don't get stuck vectorizing our own
+ // shuffle chains.
+ bool HasNontrivialInsts = false;
+
+ // The node weights represent the cost savings associated with
+ // fusing the pair of instructions.
+ for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
+ E = PrunedTree.end(); S != E; ++S) {
+ if (!isa<ShuffleVectorInst>(S->first) &&
+ !isa<InsertElementInst>(S->first) &&
+ !isa<ExtractElementInst>(S->first))
+ HasNontrivialInsts = true;
+
+ bool FlipOrder = false;
+
+ if (getDepthFactor(S->first)) {
+ int ESContrib = CandidatePairCostSavings.find(*S)->second;
+ DEBUG(if (DebugPairSelection) dbgs() << "\tweight {"
+ << *S->first << " <-> " << *S->second << "} = " <<
+ ESContrib << "\n");
+ EffSize += ESContrib;
+ }
+
+ // The edge weights contribute in a negative sense: they represent
+ // the cost of shuffles.
+ VPPIteratorPair IP = ConnectedPairDeps.equal_range(*S);
+ if (IP.first != ConnectedPairDeps.end()) {
+ unsigned NumDepsDirect = 0, NumDepsSwap = 0;
+ for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first;
+ Q != IP.second; ++Q) {
+ if (!PrunedTree.count(Q->second))
+ continue;
+ DenseMap<VPPair, unsigned>::iterator R =
+ PairConnectionTypes.find(VPPair(Q->second, Q->first));
+ assert(R != PairConnectionTypes.end() &&
+ "Cannot find pair connection type");
+ if (R->second == PairConnectionDirect)
+ ++NumDepsDirect;
+ else if (R->second == PairConnectionSwap)
+ ++NumDepsSwap;
+ }
+
+ // If there are more swaps than direct connections, then
+ // the pair order will be flipped during fusion. So the real
+ // number of swaps is the minimum number.
+ FlipOrder = !FixedOrderPairs.count(*S) &&
+ ((NumDepsSwap > NumDepsDirect) ||
+ FixedOrderPairs.count(ValuePair(S->second, S->first)));
+
+ for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first;
+ Q != IP.second; ++Q) {
+ if (!PrunedTree.count(Q->second))
+ continue;
+ DenseMap<VPPair, unsigned>::iterator R =
+ PairConnectionTypes.find(VPPair(Q->second, Q->first));
+ assert(R != PairConnectionTypes.end() &&
+ "Cannot find pair connection type");
+ Type *Ty1 = Q->second.first->getType(),
+ *Ty2 = Q->second.second->getType();
+ Type *VTy = getVecTypeForPair(Ty1, Ty2);
+ if ((R->second == PairConnectionDirect && FlipOrder) ||
+ (R->second == PairConnectionSwap && !FlipOrder) ||
+ R->second == PairConnectionSplat) {
+ int ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
+ VTy, VTy);
+ DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
+ *Q->second.first << " <-> " << *Q->second.second <<
+ "} -> {" <<
+ *S->first << " <-> " << *S->second << "} = " <<
+ ESContrib << "\n");
+ EffSize -= ESContrib;
+ }
+ }
+ }
+
+ // Compute the cost of outgoing edges. We assume that edges outgoing
+ // to shuffles, inserts or extracts can be merged, and so contribute
+ // no additional cost.
+ if (!S->first->getType()->isVoidTy()) {
+ Type *Ty1 = S->first->getType(),
+ *Ty2 = S->second->getType();
+ Type *VTy = getVecTypeForPair(Ty1, Ty2);
+
+ bool NeedsExtraction = false;
+ for (Value::use_iterator I = S->first->use_begin(),
+ IE = S->first->use_end(); I != IE; ++I) {
+ if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) {
+ // Shuffle can be folded if it has no other input
+ if (isa<UndefValue>(SI->getOperand(1)))
+ continue;
+ }
+ if (isa<ExtractElementInst>(*I))
+ continue;
+ if (PrunedTreeInstrs.count(*I))
+ continue;
+ NeedsExtraction = true;
+ break;
+ }
+
+ if (NeedsExtraction) {
+ int ESContrib;
+ if (Ty1->isVectorTy())
+ ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
+ Ty1, VTy);
+ else
+ ESContrib = (int) VTTI->getVectorInstrCost(
+ Instruction::ExtractElement, VTy, 0);
+
+ DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
+ *S->first << "} = " << ESContrib << "\n");
+ EffSize -= ESContrib;
+ }
+
+ NeedsExtraction = false;
+ for (Value::use_iterator I = S->second->use_begin(),
+ IE = S->second->use_end(); I != IE; ++I) {
+ if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) {
+ // Shuffle can be folded if it has no other input
+ if (isa<UndefValue>(SI->getOperand(1)))
+ continue;
+ }
+ if (isa<ExtractElementInst>(*I))
+ continue;
+ if (PrunedTreeInstrs.count(*I))
+ continue;
+ NeedsExtraction = true;
+ break;
+ }
+
+ if (NeedsExtraction) {
+ int ESContrib;
+ if (Ty2->isVectorTy())
+ ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
+ Ty2, VTy);
+ else
+ ESContrib = (int) VTTI->getVectorInstrCost(
+ Instruction::ExtractElement, VTy, 1);
+ DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
+ *S->second << "} = " << ESContrib << "\n");
+ EffSize -= ESContrib;
+ }
+ }
+
+ // Compute the cost of incoming edges.
+ if (!isa<LoadInst>(S->first) && !isa<StoreInst>(S->first)) {
+ Instruction *S1 = cast<Instruction>(S->first),
+ *S2 = cast<Instruction>(S->second);
+ for (unsigned o = 0; o < S1->getNumOperands(); ++o) {
+ Value *O1 = S1->getOperand(o), *O2 = S2->getOperand(o);
+
+ // Combining constants into vector constants (or small vector
+ // constants into larger ones are assumed free).
+ if (isa<Constant>(O1) && isa<Constant>(O2))
+ continue;
+
+ if (FlipOrder)
+ std::swap(O1, O2);
+
+ ValuePair VP = ValuePair(O1, O2);
+ ValuePair VPR = ValuePair(O2, O1);
+
+ // Internal edges are not handled here.
+ if (PrunedTree.count(VP) || PrunedTree.count(VPR))
+ continue;
+
+ Type *Ty1 = O1->getType(),
+ *Ty2 = O2->getType();
+ Type *VTy = getVecTypeForPair(Ty1, Ty2);
+
+ // Combining vector operations of the same type is also assumed
+ // folded with other operations.
+ if (Ty1 == Ty2) {
+ // If both are insert elements, then both can be widened.
+ InsertElementInst *IEO1 = dyn_cast<InsertElementInst>(O1),
+ *IEO2 = dyn_cast<InsertElementInst>(O2);
+ if (IEO1 && IEO2 && isPureIEChain(IEO1) && isPureIEChain(IEO2))
+ continue;
+ // If both are extract elements, and both have the same input
+ // type, then they can be replaced with a shuffle
+ ExtractElementInst *EIO1 = dyn_cast<ExtractElementInst>(O1),
+ *EIO2 = dyn_cast<ExtractElementInst>(O2);
+ if (EIO1 && EIO2 &&
+ EIO1->getOperand(0)->getType() ==
+ EIO2->getOperand(0)->getType())
+ continue;
+ // If both are a shuffle with equal operand types and only two
+ // unqiue operands, then they can be replaced with a single
+ // shuffle
+ ShuffleVectorInst *SIO1 = dyn_cast<ShuffleVectorInst>(O1),
+ *SIO2 = dyn_cast<ShuffleVectorInst>(O2);
+ if (SIO1 && SIO2 &&
+ SIO1->getOperand(0)->getType() ==
+ SIO2->getOperand(0)->getType()) {
+ SmallSet<Value *, 4> SIOps;
+ SIOps.insert(SIO1->getOperand(0));
+ SIOps.insert(SIO1->getOperand(1));
+ SIOps.insert(SIO2->getOperand(0));
+ SIOps.insert(SIO2->getOperand(1));
+ if (SIOps.size() <= 2)
+ continue;
+ }
+ }
+
+ int ESContrib;
+ // This pair has already been formed.
+ if (IncomingPairs.count(VP)) {
+ continue;
+ } else if (IncomingPairs.count(VPR)) {
+ ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
+ VTy, VTy);
+ } else if (!Ty1->isVectorTy() && !Ty2->isVectorTy()) {
+ ESContrib = (int) VTTI->getVectorInstrCost(
+ Instruction::InsertElement, VTy, 0);
+ ESContrib += (int) VTTI->getVectorInstrCost(
+ Instruction::InsertElement, VTy, 1);
+ } else if (!Ty1->isVectorTy()) {
+ // O1 needs to be inserted into a vector of size O2, and then
+ // both need to be shuffled together.
+ ESContrib = (int) VTTI->getVectorInstrCost(
+ Instruction::InsertElement, Ty2, 0);
+ ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
+ VTy, Ty2);
+ } else if (!Ty2->isVectorTy()) {
+ // O2 needs to be inserted into a vector of size O1, and then
+ // both need to be shuffled together.
+ ESContrib = (int) VTTI->getVectorInstrCost(
+ Instruction::InsertElement, Ty1, 0);
+ ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
+ VTy, Ty1);
+ } else {
+ Type *TyBig = Ty1, *TySmall = Ty2;
+ if (Ty2->getVectorNumElements() > Ty1->getVectorNumElements())
+ std::swap(TyBig, TySmall);
+
+ ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
+ VTy, TyBig);
+ if (TyBig != TySmall)
+ ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
+ TyBig, TySmall);
+ }
+
+ DEBUG(if (DebugPairSelection) dbgs() << "\tcost {"
+ << *O1 << " <-> " << *O2 << "} = " <<
+ ESContrib << "\n");
+ EffSize -= ESContrib;
+ IncomingPairs.insert(VP);
+ }
+ }
+ }
+
+ if (!HasNontrivialInsts) {
+ DEBUG(if (DebugPairSelection) dbgs() <<
+ "\tNo non-trivial instructions in tree;"
+ " override to zero effective size\n");
+ EffSize = 0;
+ }
+ } else {
+ for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
+ E = PrunedTree.end(); S != E; ++S)
+ EffSize += (int) getDepthFactor(S->first);
+ }
DEBUG(if (DebugPairSelection)
dbgs() << "BBV: found pruned Tree for pair {"
<< *J->first << " <-> " << *J->second << "} of depth " <<
MaxDepth << " and size " << PrunedTree.size() <<
" (effective size: " << EffSize << ")\n");
- if (MaxDepth >= Config.ReqChainDepth && EffSize > BestEffSize) {
+ if (((VTTI && !UseChainDepthWithTI) ||
+ MaxDepth >= Config.ReqChainDepth) &&
+ EffSize > 0 && EffSize > BestEffSize) {
BestMaxDepth = MaxDepth;
BestEffSize = EffSize;
BestTree = PrunedTree;
@@ -1419,8 +1984,12 @@ namespace {
// that will be fused into vector instructions.
void BBVectorize::choosePairs(
std::multimap<Value *, Value *> &CandidatePairs,
+ DenseMap<ValuePair, int> &CandidatePairCostSavings,
std::vector<Value *> &PairableInsts,
+ DenseSet<ValuePair> &FixedOrderPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairDeps,
DenseSet<ValuePair> &PairableInstUsers,
DenseMap<Value *, Value *>& ChosenPairs) {
bool UseCycleCheck =
@@ -1435,9 +2004,12 @@ namespace {
VPIteratorPair ChoiceRange = CandidatePairs.equal_range(*I);
// The best pair to choose and its tree:
- size_t BestMaxDepth = 0, BestEffSize = 0;
+ size_t BestMaxDepth = 0;
+ int BestEffSize = 0;
DenseSet<ValuePair> BestTree;
- findBestTreeFor(CandidatePairs, PairableInsts, ConnectedPairs,
+ findBestTreeFor(CandidatePairs, CandidatePairCostSavings,
+ PairableInsts, FixedOrderPairs, PairConnectionTypes,
+ ConnectedPairs, ConnectedPairDeps,
PairableInstUsers, PairableInstUserMap, ChosenPairs,
BestTree, BestMaxDepth, BestEffSize, ChoiceRange,
UseCycleCheck);
@@ -1490,24 +2062,19 @@ namespace {
// Returns the value that is to be used as the pointer input to the vector
// instruction that fuses I with J.
Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context,
- Instruction *I, Instruction *J, unsigned o,
- bool FlipMemInputs) {
+ Instruction *I, Instruction *J, unsigned o) {
Value *IPtr, *JPtr;
- unsigned IAlignment, JAlignment;
+ unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
int64_t OffsetInElmts;
- // Note: the analysis might fail here, that is why FlipMemInputs has
+ // Note: the analysis might fail here, that is why the pair order has
// been precomputed (OffsetInElmts must be unused here).
(void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
- OffsetInElmts);
+ IAddressSpace, JAddressSpace,
+ OffsetInElmts, false);
// The pointer value is taken to be the one with the lowest offset.
- Value *VPtr;
- if (!FlipMemInputs) {
- VPtr = IPtr;
- } else {
- VPtr = JPtr;
- }
+ Value *VPtr = IPtr;
Type *ArgTypeI = cast<PointerType>(IPtr->getType())->getElementType();
Type *ArgTypeJ = cast<PointerType>(JPtr->getType())->getElementType();
@@ -1515,7 +2082,7 @@ namespace {
Type *VArgPtrType = PointerType::get(VArgType,
cast<PointerType>(IPtr->getType())->getAddressSpace());
return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o),
- /* insert before */ FlipMemInputs ? J : I);
+ /* insert before */ I);
}
void BBVectorize::fillNewShuffleMask(LLVMContext& Context, Instruction *J,
@@ -1585,23 +2152,12 @@ namespace {
Instruction *J, unsigned o, Value *&LOp,
unsigned numElemL,
Type *ArgTypeL, Type *ArgTypeH,
- unsigned IdxOff) {
+ bool IBeforeJ, unsigned IdxOff) {
bool ExpandedIEChain = false;
if (InsertElementInst *LIE = dyn_cast<InsertElementInst>(LOp)) {
// If we have a pure insertelement chain, then this can be rewritten
// into a chain that directly builds the larger type.
- bool PureChain = true;
- InsertElementInst *LIENext = LIE;
- do {
- if (!isa<UndefValue>(LIENext->getOperand(0)) &&
- !isa<InsertElementInst>(LIENext->getOperand(0))) {
- PureChain = false;
- break;
- }
- } while ((LIENext =
- dyn_cast<InsertElementInst>(LIENext->getOperand(0))));
-
- if (PureChain) {
+ if (isPureIEChain(LIE)) {
SmallVector<Value *, 8> VectElemts(numElemL,
UndefValue::get(ArgTypeL->getScalarType()));
InsertElementInst *LIENext = LIE;
@@ -1619,8 +2175,9 @@ namespace {
LIENext = InsertElementInst::Create(LIEPrev, VectElemts[i],
ConstantInt::get(Type::getInt32Ty(Context),
i + IdxOff),
- getReplacementName(I, true, o, i+1));
- LIENext->insertBefore(J);
+ getReplacementName(IBeforeJ ? I : J,
+ true, o, i+1));
+ LIENext->insertBefore(IBeforeJ ? J : I);
LIEPrev = LIENext;
}
@@ -1635,7 +2192,7 @@ namespace {
// Returns the value to be used as the specified operand of the vector
// instruction that fuses I with J.
Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I,
- Instruction *J, unsigned o, bool FlipMemInputs) {
+ Instruction *J, unsigned o, bool IBeforeJ) {
Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1);
@@ -1646,12 +2203,6 @@ namespace {
Instruction *L = I, *H = J;
Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ;
- if (FlipMemInputs) {
- L = J;
- H = I;
- ArgTypeL = ArgTypeJ;
- ArgTypeH = ArgTypeI;
- }
unsigned numElemL;
if (ArgTypeL->isVectorTy())
@@ -1804,8 +2355,9 @@ namespace {
Instruction *S =
new ShuffleVectorInst(I1, UndefValue::get(I1T),
ConstantVector::get(Mask),
- getReplacementName(I, true, o));
- S->insertBefore(J);
+ getReplacementName(IBeforeJ ? I : J,
+ true, o));
+ S->insertBefore(IBeforeJ ? J : I);
return S;
}
@@ -1826,8 +2378,9 @@ namespace {
Instruction *NewI1 =
new ShuffleVectorInst(I1, UndefValue::get(I1T),
ConstantVector::get(Mask),
- getReplacementName(I, true, o, 1));
- NewI1->insertBefore(J);
+ getReplacementName(IBeforeJ ? I : J,
+ true, o, 1));
+ NewI1->insertBefore(IBeforeJ ? J : I);
I1 = NewI1;
I1T = I2T;
I1Elem = I2Elem;
@@ -1842,8 +2395,9 @@ namespace {
Instruction *NewI2 =
new ShuffleVectorInst(I2, UndefValue::get(I2T),
ConstantVector::get(Mask),
- getReplacementName(I, true, o, 1));
- NewI2->insertBefore(J);
+ getReplacementName(IBeforeJ ? I : J,
+ true, o, 1));
+ NewI2->insertBefore(IBeforeJ ? J : I);
I2 = NewI2;
I2T = I1T;
I2Elem = I1Elem;
@@ -1863,8 +2417,8 @@ namespace {
Instruction *NewOp =
new ShuffleVectorInst(I1, I2, ConstantVector::get(Mask),
- getReplacementName(I, true, o));
- NewOp->insertBefore(J);
+ getReplacementName(IBeforeJ ? I : J, true, o));
+ NewOp->insertBefore(IBeforeJ ? J : I);
return NewOp;
}
}
@@ -1872,17 +2426,17 @@ namespace {
Type *ArgType = ArgTypeL;
if (numElemL < numElemH) {
if (numElemL == 1 && expandIEChain(Context, I, J, o, HOp, numElemH,
- ArgTypeL, VArgType, 1)) {
+ ArgTypeL, VArgType, IBeforeJ, 1)) {
// This is another short-circuit case: we're combining a scalar into
// a vector that is formed by an IE chain. We've just expanded the IE
// chain, now insert the scalar and we're done.
Instruction *S = InsertElementInst::Create(HOp, LOp, CV0,
- getReplacementName(I, true, o));
- S->insertBefore(J);
+ getReplacementName(IBeforeJ ? I : J, true, o));
+ S->insertBefore(IBeforeJ ? J : I);
return S;
} else if (!expandIEChain(Context, I, J, o, LOp, numElemL, ArgTypeL,
- ArgTypeH)) {
+ ArgTypeH, IBeforeJ)) {
// The two vector inputs to the shuffle must be the same length,
// so extend the smaller vector to be the same length as the larger one.
Instruction *NLOp;
@@ -1897,29 +2451,32 @@ namespace {
NLOp = new ShuffleVectorInst(LOp, UndefValue::get(ArgTypeL),
ConstantVector::get(Mask),
- getReplacementName(I, true, o, 1));
+ getReplacementName(IBeforeJ ? I : J,
+ true, o, 1));
} else {
NLOp = InsertElementInst::Create(UndefValue::get(ArgTypeH), LOp, CV0,
- getReplacementName(I, true, o, 1));
+ getReplacementName(IBeforeJ ? I : J,
+ true, o, 1));
}
- NLOp->insertBefore(J);
+ NLOp->insertBefore(IBeforeJ ? J : I);
LOp = NLOp;
}
ArgType = ArgTypeH;
} else if (numElemL > numElemH) {
if (numElemH == 1 && expandIEChain(Context, I, J, o, LOp, numElemL,
- ArgTypeH, VArgType)) {
+ ArgTypeH, VArgType, IBeforeJ)) {
Instruction *S =
InsertElementInst::Create(LOp, HOp,
ConstantInt::get(Type::getInt32Ty(Context),
numElemL),
- getReplacementName(I, true, o));
- S->insertBefore(J);
+ getReplacementName(IBeforeJ ? I : J,
+ true, o));
+ S->insertBefore(IBeforeJ ? J : I);
return S;
} else if (!expandIEChain(Context, I, J, o, HOp, numElemH, ArgTypeH,
- ArgTypeL)) {
+ ArgTypeL, IBeforeJ)) {
Instruction *NHOp;
if (numElemH > 1) {
std::vector<Constant *> Mask(numElemL);
@@ -1931,13 +2488,15 @@ namespace {
NHOp = new ShuffleVectorInst(HOp, UndefValue::get(ArgTypeH),
ConstantVector::get(Mask),
- getReplacementName(I, true, o, 1));
+ getReplacementName(IBeforeJ ? I : J,
+ true, o, 1));
} else {
NHOp = InsertElementInst::Create(UndefValue::get(ArgTypeL), HOp, CV0,
- getReplacementName(I, true, o, 1));
+ getReplacementName(IBeforeJ ? I : J,
+ true, o, 1));
}
- NHOp->insertBefore(J);
+ NHOp->insertBefore(IBeforeJ ? J : I);
HOp = NHOp;
}
}
@@ -1955,19 +2514,21 @@ namespace {
}
Instruction *BV = new ShuffleVectorInst(LOp, HOp,
- ConstantVector::get(Mask),
- getReplacementName(I, true, o));
- BV->insertBefore(J);
+ ConstantVector::get(Mask),
+ getReplacementName(IBeforeJ ? I : J, true, o));
+ BV->insertBefore(IBeforeJ ? J : I);
return BV;
}
Instruction *BV1 = InsertElementInst::Create(
UndefValue::get(VArgType), LOp, CV0,
- getReplacementName(I, true, o, 1));
- BV1->insertBefore(I);
+ getReplacementName(IBeforeJ ? I : J,
+ true, o, 1));
+ BV1->insertBefore(IBeforeJ ? J : I);
Instruction *BV2 = InsertElementInst::Create(BV1, HOp, CV1,
- getReplacementName(I, true, o, 2));
- BV2->insertBefore(J);
+ getReplacementName(IBeforeJ ? I : J,
+ true, o, 2));
+ BV2->insertBefore(IBeforeJ ? J : I);
return BV2;
}
@@ -1976,7 +2537,7 @@ namespace {
void BBVectorize::getReplacementInputsForPair(LLVMContext& Context,
Instruction *I, Instruction *J,
SmallVector<Value *, 3> &ReplacedOperands,
- bool FlipMemInputs) {
+ bool IBeforeJ) {
unsigned NumOperands = I->getNumOperands();
for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) {
@@ -1985,8 +2546,7 @@ namespace {
if (isa<LoadInst>(I) || (o == 1 && isa<StoreInst>(I))) {
// This is the pointer for a load/store instruction.
- ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o,
- FlipMemInputs);
+ ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o);
continue;
} else if (isa<CallInst>(I)) {
Function *F = cast<CallInst>(I)->getCalledFunction();
@@ -2014,8 +2574,7 @@ namespace {
continue;
}
- ReplacedOperands[o] =
- getReplacementInput(Context, I, J, o, FlipMemInputs);
+ ReplacedOperands[o] = getReplacementInput(Context, I, J, o, IBeforeJ);
}
}
@@ -2026,8 +2585,7 @@ namespace {
void BBVectorize::replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
Instruction *J, Instruction *K,
Instruction *&InsertionPt,
- Instruction *&K1, Instruction *&K2,
- bool FlipMemInputs) {
+ Instruction *&K1, Instruction *&K2) {
if (isa<StoreInst>(I)) {
AA->replaceWithNewValue(I, K);
AA->replaceWithNewValue(J, K);
@@ -2057,13 +2615,11 @@ namespace {
}
K1 = new ShuffleVectorInst(K, UndefValue::get(VType),
- ConstantVector::get(
- FlipMemInputs ? Mask2 : Mask1),
+ ConstantVector::get( Mask1),
getReplacementName(K, false, 1));
} else {
Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
- Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1);
- K1 = ExtractElementInst::Create(K, FlipMemInputs ? CV1 : CV0,
+ K1 = ExtractElementInst::Create(K, CV0,
getReplacementName(K, false, 1));
}
@@ -2075,13 +2631,11 @@ namespace {
}
K2 = new ShuffleVectorInst(K, UndefValue::get(VType),
- ConstantVector::get(
- FlipMemInputs ? Mask1 : Mask2),
+ ConstantVector::get( Mask2),
getReplacementName(K, false, 2));
} else {
- Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1);
- K2 = ExtractElementInst::Create(K, FlipMemInputs ? CV0 : CV1,
+ K2 = ExtractElementInst::Create(K, CV1,
getReplacementName(K, false, 2));
}
@@ -2181,36 +2735,6 @@ namespace {
}
}
- // As with the aliasing information, SCEV can also change because of
- // vectorization. This information is used to compute relative pointer
- // offsets; the necessary information will be cached here prior to
- // fusion.
- void BBVectorize::collectPtrInfo(std::vector<Value *> &PairableInsts,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseSet<Value *> &LowPtrInsts) {
- for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
- PIE = PairableInsts.end(); PI != PIE; ++PI) {
- DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI);
- if (P == ChosenPairs.end()) continue;
-
- Instruction *I = cast<Instruction>(P->first);
- Instruction *J = cast<Instruction>(P->second);
-
- if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
- continue;
-
- Value *IPtr, *JPtr;
- unsigned IAlignment, JAlignment;
- int64_t OffsetInElmts;
- if (!getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
- OffsetInElmts) || abs64(OffsetInElmts) != 1)
- llvm_unreachable("Pre-fusion pointer analysis failed");
-
- Value *LowPI = (OffsetInElmts > 0) ? I : J;
- LowPtrInsts.insert(LowPI);
- }
- }
-
// When the first instruction in each pair is cloned, it will inherit its
// parent's metadata. This metadata must be combined with that of the other
// instruction in a safe way.
@@ -2244,27 +2768,27 @@ namespace {
// second member).
void BBVectorize::fuseChosenPairs(BasicBlock &BB,
std::vector<Value *> &PairableInsts,
- DenseMap<Value *, Value *> &ChosenPairs) {
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseSet<ValuePair> &FixedOrderPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+ std::multimap<ValuePair, ValuePair> &ConnectedPairDeps) {
LLVMContext& Context = BB.getContext();
// During the vectorization process, the order of the pairs to be fused
// could be flipped. So we'll add each pair, flipped, into the ChosenPairs
// list. After a pair is fused, the flipped pair is removed from the list.
- std::vector<ValuePair> FlippedPairs;
- FlippedPairs.reserve(ChosenPairs.size());
+ DenseSet<ValuePair> FlippedPairs;
for (DenseMap<Value *, Value *>::iterator P = ChosenPairs.begin(),
E = ChosenPairs.end(); P != E; ++P)
- FlippedPairs.push_back(ValuePair(P->second, P->first));
- for (std::vector<ValuePair>::iterator P = FlippedPairs.begin(),
+ FlippedPairs.insert(ValuePair(P->second, P->first));
+ for (DenseSet<ValuePair>::iterator P = FlippedPairs.begin(),
E = FlippedPairs.end(); P != E; ++P)
ChosenPairs.insert(*P);
std::multimap<Value *, Value *> LoadMoveSet;
collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet);
- DenseSet<Value *> LowPtrInsts;
- collectPtrInfo(PairableInsts, ChosenPairs, LowPtrInsts);
-
DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n");
for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) {
@@ -2304,44 +2828,92 @@ namespace {
continue;
}
- bool FlipMemInputs = false;
- if (isa<LoadInst>(I) || isa<StoreInst>(I))
- FlipMemInputs = (LowPtrInsts.find(I) == LowPtrInsts.end());
+ // If the pair must have the other order, then flip it.
+ bool FlipPairOrder = FixedOrderPairs.count(ValuePair(J, I));
+ if (!FlipPairOrder && !FixedOrderPairs.count(ValuePair(I, J))) {
+ // This pair does not have a fixed order, and so we might want to
+ // flip it if that will yield fewer shuffles. We count the number
+ // of dependencies connected via swaps, and those directly connected,
+ // and flip the order if the number of swaps is greater.
+ bool OrigOrder = true;
+ VPPIteratorPair IP = ConnectedPairDeps.equal_range(ValuePair(I, J));
+ if (IP.first == ConnectedPairDeps.end()) {
+ IP = ConnectedPairDeps.equal_range(ValuePair(J, I));
+ OrigOrder = false;
+ }
+ if (IP.first != ConnectedPairDeps.end()) {
+ unsigned NumDepsDirect = 0, NumDepsSwap = 0;
+ for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first;
+ Q != IP.second; ++Q) {
+ DenseMap<VPPair, unsigned>::iterator R =
+ PairConnectionTypes.find(VPPair(Q->second, Q->first));
+ assert(R != PairConnectionTypes.end() &&
+ "Cannot find pair connection type");
+ if (R->second == PairConnectionDirect)
+ ++NumDepsDirect;
+ else if (R->second == PairConnectionSwap)
+ ++NumDepsSwap;
+ }
+
+ if (!OrigOrder)
+ std::swap(NumDepsDirect, NumDepsSwap);
+
+ if (NumDepsSwap > NumDepsDirect) {
+ FlipPairOrder = true;
+ DEBUG(dbgs() << "BBV: reordering pair: " << *I <<
+ " <-> " << *J << "\n");
+ }
+ }
+ }
+
+ Instruction *L = I, *H = J;
+ if (FlipPairOrder)
+ std::swap(H, L);
+
+ // If the pair being fused uses the opposite order from that in the pair
+ // connection map, then we need to flip the types.
+ VPPIteratorPair IP = ConnectedPairs.equal_range(ValuePair(H, L));
+ for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first;
+ Q != IP.second; ++Q) {
+ DenseMap<VPPair, unsigned>::iterator R = PairConnectionTypes.find(*Q);
+ assert(R != PairConnectionTypes.end() &&
+ "Cannot find pair connection type");
+ if (R->second == PairConnectionDirect)
+ R->second = PairConnectionSwap;
+ else if (R->second == PairConnectionSwap)
+ R->second = PairConnectionDirect;
+ }
+
+ bool LBeforeH = !FlipPairOrder;
unsigned NumOperands = I->getNumOperands();
SmallVector<Value *, 3> ReplacedOperands(NumOperands);
- getReplacementInputsForPair(Context, I, J, ReplacedOperands,
- FlipMemInputs);
+ getReplacementInputsForPair(Context, L, H, ReplacedOperands,
+ LBeforeH);
// Make a copy of the original operation, change its type to the vector
// type and replace its operands with the vector operands.
- Instruction *K = I->clone();
- if (I->hasName()) K->takeName(I);
+ Instruction *K = L->clone();
+ if (L->hasName())
+ K->takeName(L);
+ else if (H->hasName())
+ K->takeName(H);
if (!isa<StoreInst>(K))
- K->mutateType(getVecTypeForPair(I->getType(), J->getType()));
+ K->mutateType(getVecTypeForPair(L->getType(), H->getType()));
- combineMetadata(K, J);
+ combineMetadata(K, H);
+ K->intersectOptionalDataWith(H);
for (unsigned o = 0; o < NumOperands; ++o)
K->setOperand(o, ReplacedOperands[o]);
- // If we've flipped the memory inputs, make sure that we take the correct
- // alignment.
- if (FlipMemInputs) {
- if (isa<StoreInst>(K))
- cast<StoreInst>(K)->setAlignment(cast<StoreInst>(J)->getAlignment());
- else
- cast<LoadInst>(K)->setAlignment(cast<LoadInst>(J)->getAlignment());
- }
-
K->insertAfter(J);
// Instruction insertion point:
Instruction *InsertionPt = K;
Instruction *K1 = 0, *K2 = 0;
- replaceOutputsOfPair(Context, I, J, K, InsertionPt, K1, K2,
- FlipMemInputs);
+ replaceOutputsOfPair(Context, L, H, K, InsertionPt, K1, K2);
// The use tree of the first original instruction must be moved to after
// the location of the second instruction. The entire use tree of the
@@ -2351,10 +2923,10 @@ namespace {
moveUsesOfIAfterJ(BB, LoadMoveSet, InsertionPt, I, J);
if (!isa<StoreInst>(I)) {
- I->replaceAllUsesWith(K1);
- J->replaceAllUsesWith(K2);
- AA->replaceWithNewValue(I, K1);
- AA->replaceWithNewValue(J, K2);
+ L->replaceAllUsesWith(K1);
+ H->replaceAllUsesWith(K2);
+ AA->replaceWithNewValue(L, K1);
+ AA->replaceWithNewValue(H, K2);
}
// Instructions that may read from memory may be in the load move set.
@@ -2387,6 +2959,9 @@ namespace {
SE->forgetValue(J);
I->eraseFromParent();
J->eraseFromParent();
+
+ DEBUG(if (PrintAfterEveryPair) dbgs() << "BBV: block is now: \n" <<
+ BB << "\n");
}
DEBUG(dbgs() << "BBV: final: \n" << BB << "\n");
@@ -2397,6 +2972,7 @@ char BBVectorize::ID = 0;
static const char bb_vectorize_name[] = "Basic-Block Vectorization";
INITIALIZE_PASS_BEGIN(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
new file mode 100644
index 0000000..a7ef248
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -0,0 +1,1941 @@
+//===- LoopVectorize.cpp - A Loop Vectorizer ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops
+// and generates target-independent LLVM-IR. Legalization of the IR is done
+// in the codegen. However, the vectorizes uses (will use) the codegen
+// interfaces to generate IR that is likely to result in an optimal binary.
+//
+// The loop vectorizer combines consecutive loop iteration into a single
+// 'wide' iteration. After this transformation the index is incremented
+// by the SIMD vector width, and not by one.
+//
+// This pass has three parts:
+// 1. The main loop pass that drives the different parts.
+// 2. LoopVectorizationLegality - A unit that checks for the legality
+// of the vectorization.
+// 3. SingleBlockLoopVectorizer - A unit that performs the actual
+// widening of instructions.
+// 4. LoopVectorizationCostModel - A unit that checks for the profitability
+// of vectorization. It decides on the optimal vector width, which
+// can be one, if vectorization is not profitable.
+//===----------------------------------------------------------------------===//
+//
+// The reduction-variable vectorization is based on the paper:
+// D. Nuzman and R. Henderson. Multi-platform Auto-vectorization.
+//
+// Variable uniformity checks are inspired by:
+// Karrenberg, R. and Hack, S. Whole Function Vectorization.
+//
+// Other ideas/concepts are from:
+// A. Zaks and D. Nuzman. Autovectorization in GCC-two years later.
+//
+//===----------------------------------------------------------------------===//
+#define LV_NAME "loop-vectorize"
+#define DEBUG_TYPE LV_NAME
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Value.h"
+#include "llvm/Function.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/TargetTransformInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/DataLayout.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+using namespace llvm;
+
+static cl::opt<unsigned>
+VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
+ cl::desc("Set the default vectorization width. Zero is autoselect."));
+
+/// We don't vectorize loops with a known constant trip count below this number.
+const unsigned TinyTripCountThreshold = 16;
+
+/// When performing a runtime memory check, do not check more than this
+/// number of pointers. Notice that the check is quadratic!
+const unsigned RuntimeMemoryCheckThreshold = 2;
+
+namespace {
+
+// Forward declarations.
+class LoopVectorizationLegality;
+class LoopVectorizationCostModel;
+
+/// SingleBlockLoopVectorizer vectorizes loops which contain only one basic
+/// block to a specified vectorization factor (VF).
+/// This class performs the widening of scalars into vectors, or multiple
+/// scalars. This class also implements the following features:
+/// * It inserts an epilogue loop for handling loops that don't have iteration
+/// counts that are known to be a multiple of the vectorization factor.
+/// * It handles the code generation for reduction variables.
+/// * Scalarization (implementation using scalars) of un-vectorizable
+/// instructions.
+/// SingleBlockLoopVectorizer does not perform any vectorization-legality
+/// checks, and relies on the caller to check for the different legality
+/// aspects. The SingleBlockLoopVectorizer relies on the
+/// LoopVectorizationLegality class to provide information about the induction
+/// and reduction variables that were found to a given vectorization factor.
+class SingleBlockLoopVectorizer {
+public:
+ /// Ctor.
+ SingleBlockLoopVectorizer(Loop *Orig, ScalarEvolution *Se, LoopInfo *Li,
+ DominatorTree *dt, LPPassManager *Lpm,
+ unsigned VecWidth):
+ OrigLoop(Orig), SE(Se), LI(Li), DT(dt), LPM(Lpm), VF(VecWidth),
+ Builder(Se->getContext()), Induction(0), OldInduction(0) { }
+
+ // Perform the actual loop widening (vectorization).
+ void vectorize(LoopVectorizationLegality *Legal) {
+ ///Create a new empty loop. Unlink the old loop and connect the new one.
+ createEmptyLoop(Legal);
+ /// Widen each instruction in the old loop to a new one in the new loop.
+ /// Use the Legality module to find the induction and reduction variables.
+ vectorizeLoop(Legal);
+ // Register the new loop and update the analysis passes.
+ updateAnalysis();
+ }
+
+private:
+ /// Create an empty loop, based on the loop ranges of the old loop.
+ void createEmptyLoop(LoopVectorizationLegality *Legal);
+ /// Copy and widen the instructions from the old loop.
+ void vectorizeLoop(LoopVectorizationLegality *Legal);
+ /// Insert the new loop to the loop hierarchy and pass manager
+ /// and update the analysis passes.
+ void updateAnalysis();
+
+ /// This instruction is un-vectorizable. Implement it as a sequence
+ /// of scalars.
+ void scalarizeInstruction(Instruction *Instr);
+
+ /// Create a broadcast instruction. This method generates a broadcast
+ /// instruction (shuffle) for loop invariant values and for the induction
+ /// value. If this is the induction variable then we extend it to N, N+1, ...
+ /// this is needed because each iteration in the loop corresponds to a SIMD
+ /// element.
+ Value *getBroadcastInstrs(Value *V);
+
+ /// This is a helper function used by getBroadcastInstrs. It adds 0, 1, 2 ..
+ /// for each element in the vector. Starting from zero.
+ Value *getConsecutiveVector(Value* Val);
+
+ /// When we go over instructions in the basic block we rely on previous
+ /// values within the current basic block or on loop invariant values.
+ /// When we widen (vectorize) values we place them in the map. If the values
+ /// are not within the map, they have to be loop invariant, so we simply
+ /// broadcast them into a vector.
+ Value *getVectorValue(Value *V);
+
+ /// Get a uniform vector of constant integers. We use this to get
+ /// vectors of ones and zeros for the reduction code.
+ Constant* getUniformVector(unsigned Val, Type* ScalarTy);
+
+ typedef DenseMap<Value*, Value*> ValueMap;
+
+ /// The original loop.
+ Loop *OrigLoop;
+ // Scev analysis to use.
+ ScalarEvolution *SE;
+ // Loop Info.
+ LoopInfo *LI;
+ // Dominator Tree.
+ DominatorTree *DT;
+ // Loop Pass Manager;
+ LPPassManager *LPM;
+ // The vectorization factor to use.
+ unsigned VF;
+
+ // The builder that we use
+ IRBuilder<> Builder;
+
+ // --- Vectorization state ---
+
+ /// The vector-loop preheader.
+ BasicBlock *LoopVectorPreHeader;
+ /// The scalar-loop preheader.
+ BasicBlock *LoopScalarPreHeader;
+ /// Middle Block between the vector and the scalar.
+ BasicBlock *LoopMiddleBlock;
+ ///The ExitBlock of the scalar loop.
+ BasicBlock *LoopExitBlock;
+ ///The vector loop body.
+ BasicBlock *LoopVectorBody;
+ ///The scalar loop body.
+ BasicBlock *LoopScalarBody;
+ ///The first bypass block.
+ BasicBlock *LoopBypassBlock;
+
+ /// The new Induction variable which was added to the new block.
+ PHINode *Induction;
+ /// The induction variable of the old basic block.
+ PHINode *OldInduction;
+ // Maps scalars to widened vectors.
+ ValueMap WidenMap;
+};
+
+/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
+/// to what vectorization factor.
+/// This class does not look at the profitability of vectorization, only the
+/// legality. This class has two main kinds of checks:
+/// * Memory checks - The code in canVectorizeMemory checks if vectorization
+/// will change the order of memory accesses in a way that will change the
+/// correctness of the program.
+/// * Scalars checks - The code in canVectorizeBlock checks for a number
+/// of different conditions, such as the availability of a single induction
+/// variable, that all types are supported and vectorize-able, etc.
+/// This code reflects the capabilities of SingleBlockLoopVectorizer.
+/// This class is also used by SingleBlockLoopVectorizer for identifying
+/// induction variable and the different reduction variables.
+class LoopVectorizationLegality {
+public:
+ LoopVectorizationLegality(Loop *Lp, ScalarEvolution *Se, DataLayout *Dl):
+ TheLoop(Lp), SE(Se), DL(Dl), Induction(0) { }
+
+ /// This represents the kinds of reductions that we support.
+ enum ReductionKind {
+ NoReduction, /// Not a reduction.
+ IntegerAdd, /// Sum of numbers.
+ IntegerMult, /// Product of numbers.
+ IntegerOr, /// Bitwise or logical OR of numbers.
+ IntegerAnd, /// Bitwise or logical AND of numbers.
+ IntegerXor /// Bitwise or logical XOR of numbers.
+ };
+
+ /// This POD struct holds information about reduction variables.
+ struct ReductionDescriptor {
+ // Default C'tor
+ ReductionDescriptor():
+ StartValue(0), LoopExitInstr(0), Kind(NoReduction) {}
+
+ // C'tor.
+ ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K):
+ StartValue(Start), LoopExitInstr(Exit), Kind(K) {}
+
+ // The starting value of the reduction.
+ // It does not have to be zero!
+ Value *StartValue;
+ // The instruction who's value is used outside the loop.
+ Instruction *LoopExitInstr;
+ // The kind of the reduction.
+ ReductionKind Kind;
+ };
+
+ // This POD struct holds information about the memory runtime legality
+ // check that a group of pointers do not overlap.
+ struct RuntimePointerCheck {
+ /// This flag indicates if we need to add the runtime check.
+ bool Need;
+ /// Holds the pointers that we need to check.
+ SmallVector<Value*, 2> Pointers;
+ };
+
+ /// ReductionList contains the reduction descriptors for all
+ /// of the reductions that were found in the loop.
+ typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList;
+
+ /// Returns true if it is legal to vectorize this loop.
+ /// This does not mean that it is profitable to vectorize this
+ /// loop, only that it is legal to do so.
+ bool canVectorize();
+
+ /// Returns the Induction variable.
+ PHINode *getInduction() {return Induction;}
+
+ /// Returns the reduction variables found in the loop.
+ ReductionList *getReductionVars() { return &Reductions; }
+
+ /// Check if the pointer returned by this GEP is consecutive
+ /// when the index is vectorized. This happens when the last
+ /// index of the GEP is consecutive, like the induction variable.
+ /// This check allows us to vectorize A[idx] into a wide load/store.
+ bool isConsecutiveGep(Value *Ptr);
+
+ /// Returns true if the value V is uniform within the loop.
+ bool isUniform(Value *V);
+
+ /// Returns true if this instruction will remain scalar after vectorization.
+ bool isUniformAfterVectorization(Instruction* I) {return Uniforms.count(I);}
+
+ /// Returns the information that we collected about runtime memory check.
+ RuntimePointerCheck *getRuntimePointerCheck() {return &PtrRtCheck; }
+private:
+ /// Check if a single basic block loop is vectorizable.
+ /// At this point we know that this is a loop with a constant trip count
+ /// and we only need to check individual instructions.
+ bool canVectorizeBlock(BasicBlock &BB);
+
+ /// When we vectorize loops we may change the order in which
+ /// we read and write from memory. This method checks if it is
+ /// legal to vectorize the code, considering only memory constrains.
+ /// Returns true if BB is vectorizable
+ bool canVectorizeMemory(BasicBlock &BB);
+
+ /// Returns True, if 'Phi' is the kind of reduction variable for type
+ /// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
+ bool AddReductionVar(PHINode *Phi, ReductionKind Kind);
+ /// Returns true if the instruction I can be a reduction variable of type
+ /// 'Kind'.
+ bool isReductionInstr(Instruction *I, ReductionKind Kind);
+ /// Returns True, if 'Phi' is an induction variable.
+ bool isInductionVariable(PHINode *Phi);
+ /// Return true if can compute the address bounds of Ptr within the loop.
+ bool hasComputableBounds(Value *Ptr);
+
+ /// The loop that we evaluate.
+ Loop *TheLoop;
+ /// Scev analysis.
+ ScalarEvolution *SE;
+ /// DataLayout analysis.
+ DataLayout *DL;
+
+ // --- vectorization state --- //
+
+ /// Holds the induction variable.
+ PHINode *Induction;
+ /// Holds the reduction variables.
+ ReductionList Reductions;
+ /// Allowed outside users. This holds the reduction
+ /// vars which can be accessed from outside the loop.
+ SmallPtrSet<Value*, 4> AllowedExit;
+ /// This set holds the variables which are known to be uniform after
+ /// vectorization.
+ SmallPtrSet<Instruction*, 4> Uniforms;
+ /// We need to check that all of the pointers in this list are disjoint
+ /// at runtime.
+ RuntimePointerCheck PtrRtCheck;
+};
+
+/// LoopVectorizationCostModel - estimates the expected speedups due to
+/// vectorization.
+/// In many cases vectorization is not profitable. This can happen because
+/// of a number of reasons. In this class we mainly attempt to predict
+/// the expected speedup/slowdowns due to the supported instruction set.
+/// We use the VectorTargetTransformInfo to query the different backends
+/// for the cost of different operations.
+class LoopVectorizationCostModel {
+public:
+ /// C'tor.
+ LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se,
+ LoopVectorizationLegality *Leg,
+ const VectorTargetTransformInfo *Vtti):
+ TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { }
+
+ /// Returns the most profitable vectorization factor for the loop that is
+ /// smaller or equal to the VF argument. This method checks every power
+ /// of two up to VF.
+ unsigned findBestVectorizationFactor(unsigned VF = 8);
+
+private:
+ /// Returns the expected execution cost. The unit of the cost does
+ /// not matter because we use the 'cost' units to compare different
+ /// vector widths. The cost that is returned is *not* normalized by
+ /// the factor width.
+ unsigned expectedCost(unsigned VF);
+
+ /// Returns the execution time cost of an instruction for a given vector
+ /// width. Vector width of one means scalar.
+ unsigned getInstructionCost(Instruction *I, unsigned VF);
+
+ /// A helper function for converting Scalar types to vector types.
+ /// If the incoming type is void, we return void. If the VF is 1, we return
+ /// the scalar type.
+ static Type* ToVectorTy(Type *Scalar, unsigned VF);
+
+ /// The loop that we evaluate.
+ Loop *TheLoop;
+ /// Scev analysis.
+ ScalarEvolution *SE;
+
+ /// Vectorization legality.
+ LoopVectorizationLegality *Legal;
+ /// Vector target information.
+ const VectorTargetTransformInfo *VTTI;
+};
+
+struct LoopVectorize : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+
+ LoopVectorize() : LoopPass(ID) {
+ initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
+ }
+
+ ScalarEvolution *SE;
+ DataLayout *DL;
+ LoopInfo *LI;
+ TargetTransformInfo *TTI;
+ DominatorTree *DT;
+
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
+ // We only vectorize innermost loops.
+ if (!L->empty())
+ return false;
+
+ SE = &getAnalysis<ScalarEvolution>();
+ DL = getAnalysisIfAvailable<DataLayout>();
+ LI = &getAnalysis<LoopInfo>();
+ TTI = getAnalysisIfAvailable<TargetTransformInfo>();
+ DT = &getAnalysis<DominatorTree>();
+
+ DEBUG(dbgs() << "LV: Checking a loop in \"" <<
+ L->getHeader()->getParent()->getName() << "\"\n");
+
+ // Check if it is legal to vectorize the loop.
+ LoopVectorizationLegality LVL(L, SE, DL);
+ if (!LVL.canVectorize()) {
+ DEBUG(dbgs() << "LV: Not vectorizing.\n");
+ return false;
+ }
+
+ // Select the preffered vectorization factor.
+ unsigned VF = 1;
+ if (VectorizationFactor == 0) {
+ const VectorTargetTransformInfo *VTTI = 0;
+ if (TTI)
+ VTTI = TTI->getVectorTargetTransformInfo();
+ // Use the cost model.
+ LoopVectorizationCostModel CM(L, SE, &LVL, VTTI);
+ VF = CM.findBestVectorizationFactor();
+
+ if (VF == 1) {
+ DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
+ return false;
+ }
+
+ } else {
+ // Use the user command flag.
+ VF = VectorizationFactor;
+ }
+
+ DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF << ") in "<<
+ L->getHeader()->getParent()->getParent()->getModuleIdentifier()<<
+ "\n");
+
+ // If we decided that it is *legal* to vectorizer the loop then do it.
+ SingleBlockLoopVectorizer LB(L, SE, LI, DT, &LPM, VF);
+ LB.vectorize(&LVL);
+
+ DEBUG(verifyFunction(*L->getHeader()->getParent()));
+ return true;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ LoopPass::getAnalysisUsage(AU);
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<LoopInfo>();
+ AU.addPreserved<DominatorTree>();
+ }
+
+};
+
+Value *SingleBlockLoopVectorizer::getBroadcastInstrs(Value *V) {
+ // Instructions that access the old induction variable
+ // actually want to get the new one.
+ if (V == OldInduction)
+ V = Induction;
+ // Create the types.
+ LLVMContext &C = V->getContext();
+ Type *VTy = VectorType::get(V->getType(), VF);
+ Type *I32 = IntegerType::getInt32Ty(C);
+ Constant *Zero = ConstantInt::get(I32, 0);
+ Value *Zeros = ConstantAggregateZero::get(VectorType::get(I32, VF));
+ Value *UndefVal = UndefValue::get(VTy);
+ // Insert the value into a new vector.
+ Value *SingleElem = Builder.CreateInsertElement(UndefVal, V, Zero);
+ // Broadcast the scalar into all locations in the vector.
+ Value *Shuf = Builder.CreateShuffleVector(SingleElem, UndefVal, Zeros,
+ "broadcast");
+ // We are accessing the induction variable. Make sure to promote the
+ // index for each consecutive SIMD lane. This adds 0,1,2 ... to all lanes.
+ if (V == Induction)
+ return getConsecutiveVector(Shuf);
+ return Shuf;
+}
+
+Value *SingleBlockLoopVectorizer::getConsecutiveVector(Value* Val) {
+ assert(Val->getType()->isVectorTy() && "Must be a vector");
+ assert(Val->getType()->getScalarType()->isIntegerTy() &&
+ "Elem must be an integer");
+ // Create the types.
+ Type *ITy = Val->getType()->getScalarType();
+ VectorType *Ty = cast<VectorType>(Val->getType());
+ unsigned VLen = Ty->getNumElements();
+ SmallVector<Constant*, 8> Indices;
+
+ // Create a vector of consecutive numbers from zero to VF.
+ for (unsigned i = 0; i < VLen; ++i)
+ Indices.push_back(ConstantInt::get(ITy, i));
+
+ // Add the consecutive indices to the vector value.
+ Constant *Cv = ConstantVector::get(Indices);
+ assert(Cv->getType() == Val->getType() && "Invalid consecutive vec");
+ return Builder.CreateAdd(Val, Cv, "induction");
+}
+
+bool LoopVectorizationLegality::isConsecutiveGep(Value *Ptr) {
+ GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr);
+ if (!Gep)
+ return false;
+
+ unsigned NumOperands = Gep->getNumOperands();
+ Value *LastIndex = Gep->getOperand(NumOperands - 1);
+
+ // Check that all of the gep indices are uniform except for the last.
+ for (unsigned i = 0; i < NumOperands - 1; ++i)
+ if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
+ return false;
+
+ // We can emit wide load/stores only of the last index is the induction
+ // variable.
+ const SCEV *Last = SE->getSCEV(LastIndex);
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) {
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+
+ // The memory is consecutive because the last index is consecutive
+ // and all other indices are loop invariant.
+ if (Step->isOne())
+ return true;
+ }
+
+ return false;
+}
+
+bool LoopVectorizationLegality::isUniform(Value *V) {
+ return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
+}
+
+Value *SingleBlockLoopVectorizer::getVectorValue(Value *V) {
+ assert(!V->getType()->isVectorTy() && "Can't widen a vector");
+ // If we saved a vectorized copy of V, use it.
+ Value *&MapEntry = WidenMap[V];
+ if (MapEntry)
+ return MapEntry;
+
+ // Broadcast V and save the value for future uses.
+ Value *B = getBroadcastInstrs(V);
+ MapEntry = B;
+ return B;
+}
+
+Constant*
+SingleBlockLoopVectorizer::getUniformVector(unsigned Val, Type* ScalarTy) {
+ SmallVector<Constant*, 8> Indices;
+ // Create a vector of consecutive numbers from zero to VF.
+ for (unsigned i = 0; i < VF; ++i)
+ Indices.push_back(ConstantInt::get(ScalarTy, Val, true));
+
+ // Add the consecutive indices to the vector value.
+ return ConstantVector::get(Indices);
+}
+
+void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
+ assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
+ // Holds vector parameters or scalars, in case of uniform vals.
+ SmallVector<Value*, 8> Params;
+
+ // Find all of the vectorized parameters.
+ for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
+ Value *SrcOp = Instr->getOperand(op);
+
+ // If we are accessing the old induction variable, use the new one.
+ if (SrcOp == OldInduction) {
+ Params.push_back(getBroadcastInstrs(Induction));
+ continue;
+ }
+
+ // Try using previously calculated values.
+ Instruction *SrcInst = dyn_cast<Instruction>(SrcOp);
+
+ // If the src is an instruction that appeared earlier in the basic block
+ // then it should already be vectorized.
+ if (SrcInst && SrcInst->getParent() == Instr->getParent()) {
+ assert(WidenMap.count(SrcInst) && "Source operand is unavailable");
+ // The parameter is a vector value from earlier.
+ Params.push_back(WidenMap[SrcInst]);
+ } else {
+ // The parameter is a scalar from outside the loop. Maybe even a constant.
+ Params.push_back(SrcOp);
+ }
+ }
+
+ assert(Params.size() == Instr->getNumOperands() &&
+ "Invalid number of operands");
+
+ // Does this instruction return a value ?
+ bool IsVoidRetTy = Instr->getType()->isVoidTy();
+ Value *VecResults = 0;
+
+ // If we have a return value, create an empty vector. We place the scalarized
+ // instructions in this vector.
+ if (!IsVoidRetTy)
+ VecResults = UndefValue::get(VectorType::get(Instr->getType(), VF));
+
+ // For each scalar that we create:
+ for (unsigned i = 0; i < VF; ++i) {
+ Instruction *Cloned = Instr->clone();
+ if (!IsVoidRetTy)
+ Cloned->setName(Instr->getName() + ".cloned");
+ // Replace the operands of the cloned instrucions with extracted scalars.
+ for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
+ Value *Op = Params[op];
+ // Param is a vector. Need to extract the right lane.
+ if (Op->getType()->isVectorTy())
+ Op = Builder.CreateExtractElement(Op, Builder.getInt32(i));
+ Cloned->setOperand(op, Op);
+ }
+
+ // Place the cloned scalar in the new loop.
+ Builder.Insert(Cloned);
+
+ // If the original scalar returns a value we need to place it in a vector
+ // so that future users will be able to use it.
+ if (!IsVoidRetTy)
+ VecResults = Builder.CreateInsertElement(VecResults, Cloned,
+ Builder.getInt32(i));
+ }
+
+ if (!IsVoidRetTy)
+ WidenMap[Instr] = VecResults;
+}
+
+void
+SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
+ /*
+ In this function we generate a new loop. The new loop will contain
+ the vectorized instructions while the old loop will continue to run the
+ scalar remainder.
+
+ [ ] <-- vector loop bypass.
+ / |
+ / v
+| [ ] <-- vector pre header.
+| |
+| v
+| [ ] \
+| [ ]_| <-- vector loop.
+| |
+ \ v
+ >[ ] <--- middle-block.
+ / |
+ / v
+| [ ] <--- new preheader.
+| |
+| v
+| [ ] \
+| [ ]_| <-- old scalar loop to handle remainder.
+ \ |
+ \ v
+ >[ ] <-- exit block.
+ ...
+ */
+
+ OldInduction = Legal->getInduction();
+ assert(OldInduction && "We must have a single phi node.");
+ Type *IdxTy = OldInduction->getType();
+
+ // Find the loop boundaries.
+ const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getHeader());
+ assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
+
+ // Get the total trip count from the count by adding 1.
+ ExitCount = SE->getAddExpr(ExitCount,
+ SE->getConstant(ExitCount->getType(), 1));
+ // We may need to extend the index in case there is a type mismatch.
+ // We know that the count starts at zero and does not overflow.
+ // We are using Zext because it should be less expensive.
+ if (ExitCount->getType() != IdxTy)
+ ExitCount = SE->getZeroExtendExpr(ExitCount, IdxTy);
+
+ // This is the original scalar-loop preheader.
+ BasicBlock *BypassBlock = OrigLoop->getLoopPreheader();
+ BasicBlock *ExitBlock = OrigLoop->getExitBlock();
+ assert(ExitBlock && "Must have an exit block");
+
+ // The loop index does not have to start at Zero. It starts with this value.
+ Value *StartIdx = OldInduction->getIncomingValueForBlock(BypassBlock);
+
+ assert(OrigLoop->getNumBlocks() == 1 && "Invalid loop");
+ assert(BypassBlock && "Invalid loop structure");
+
+ BasicBlock *VectorPH =
+ BypassBlock->splitBasicBlock(BypassBlock->getTerminator(), "vector.ph");
+ BasicBlock *VecBody = VectorPH->splitBasicBlock(VectorPH->getTerminator(),
+ "vector.body");
+
+ BasicBlock *MiddleBlock = VecBody->splitBasicBlock(VecBody->getTerminator(),
+ "middle.block");
+ BasicBlock *ScalarPH =
+ MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(),
+ "scalar.preheader");
+ // Find the induction variable.
+ BasicBlock *OldBasicBlock = OrigLoop->getHeader();
+
+ // Use this IR builder to create the loop instructions (Phi, Br, Cmp)
+ // inside the loop.
+ Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
+
+ // Generate the induction variable.
+ Induction = Builder.CreatePHI(IdxTy, 2, "index");
+ Constant *Step = ConstantInt::get(IdxTy, VF);
+
+ // Expand the trip count and place the new instructions in the preheader.
+ // Notice that the pre-header does not change, only the loop body.
+ SCEVExpander Exp(*SE, "induction");
+ Instruction *Loc = BypassBlock->getTerminator();
+
+ // Count holds the overall loop count (N).
+ Value *Count = Exp.expandCodeFor(ExitCount, Induction->getType(), Loc);
+
+ // Add the start index to the loop count to get the new end index.
+ Value *IdxEnd = BinaryOperator::CreateAdd(Count, StartIdx, "end.idx", Loc);
+
+ // Now we need to generate the expression for N - (N % VF), which is
+ // the part that the vectorized body will execute.
+ Constant *CIVF = ConstantInt::get(IdxTy, VF);
+ Value *R = BinaryOperator::CreateURem(Count, CIVF, "n.mod.vf", Loc);
+ Value *CountRoundDown = BinaryOperator::CreateSub(Count, R, "n.vec", Loc);
+ Value *IdxEndRoundDown = BinaryOperator::CreateAdd(CountRoundDown, StartIdx,
+ "end.idx.rnd.down", Loc);
+
+ // Now, compare the new count to zero. If it is zero, jump to the scalar part.
+ Value *Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
+ IdxEndRoundDown,
+ StartIdx,
+ "cmp.zero", Loc);
+
+ LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck =
+ Legal->getRuntimePointerCheck();
+ Value *MemoryRuntimeCheck = 0;
+ if (PtrRtCheck->Need) {
+ unsigned NumPointers = PtrRtCheck->Pointers.size();
+ SmallVector<Value* , 2> Starts;
+ SmallVector<Value* , 2> Ends;
+
+ // Use this type for pointer arithmetic.
+ Type* PtrArithTy = PtrRtCheck->Pointers[0]->getType();
+
+ for (unsigned i=0; i < NumPointers; ++i) {
+ Value *Ptr = PtrRtCheck->Pointers[i];
+ const SCEV *Sc = SE->getSCEV(Ptr);
+
+ if (SE->isLoopInvariant(Sc, OrigLoop)) {
+ DEBUG(dbgs() << "LV1: Adding RT check for a loop invariant ptr:" <<
+ *Ptr <<"\n");
+ Starts.push_back(Ptr);
+ Ends.push_back(Ptr);
+ } else {
+ DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr <<"\n");
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
+ Value *Start = Exp.expandCodeFor(AR->getStart(), PtrArithTy, Loc);
+ const SCEV *Ex = SE->getExitCount(OrigLoop, OrigLoop->getHeader());
+ const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
+ assert(!isa<SCEVCouldNotCompute>(ScEnd) && "Invalid scev range.");
+ Value *End = Exp.expandCodeFor(ScEnd, PtrArithTy, Loc);
+ Starts.push_back(Start);
+ Ends.push_back(End);
+ }
+ }
+
+ for (unsigned i=0; i < NumPointers; ++i) {
+ for (unsigned j=i+1; j < NumPointers; ++j) {
+ Value *Cmp0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
+ Starts[0], Ends[1], "bound0", Loc);
+ Value *Cmp1 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
+ Starts[1], Ends[0], "bound1", Loc);
+ Value *IsConflict = BinaryOperator::Create(Instruction::And, Cmp0, Cmp1,
+ "found.conflict", Loc);
+ if (MemoryRuntimeCheck) {
+ MemoryRuntimeCheck = BinaryOperator::Create(Instruction::Or,
+ MemoryRuntimeCheck,
+ IsConflict,
+ "conflict.rdx", Loc);
+ } else {
+ MemoryRuntimeCheck = IsConflict;
+ }
+ }
+ }
+ }// end of need-runtime-check code.
+
+ // If we are using memory runtime checks, include them in.
+ if (MemoryRuntimeCheck) {
+ Cmp = BinaryOperator::Create(Instruction::Or, Cmp, MemoryRuntimeCheck,
+ "CntOrMem", Loc);
+ }
+
+ BranchInst::Create(MiddleBlock, VectorPH, Cmp, Loc);
+ // Remove the old terminator.
+ Loc->eraseFromParent();
+
+ // We are going to resume the execution of the scalar loop.
+ // This PHI decides on what number to start. If we come from the
+ // vector loop then we need to start with the end index minus the
+ // index modulo VF. If we come from a bypass edge then we need to start
+ // from the real start.
+ PHINode* ResumeIndex = PHINode::Create(IdxTy, 2, "resume.idx",
+ MiddleBlock->getTerminator());
+ ResumeIndex->addIncoming(StartIdx, BypassBlock);
+ ResumeIndex->addIncoming(IdxEndRoundDown, VecBody);
+
+ // Add a check in the middle block to see if we have completed
+ // all of the iterations in the first vector loop.
+ // If (N - N%VF) == N, then we *don't* need to run the remainder.
+ Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, IdxEnd,
+ ResumeIndex, "cmp.n",
+ MiddleBlock->getTerminator());
+
+ BranchInst::Create(ExitBlock, ScalarPH, CmpN, MiddleBlock->getTerminator());
+ // Remove the old terminator.
+ MiddleBlock->getTerminator()->eraseFromParent();
+
+ // Create i+1 and fill the PHINode.
+ Value *NextIdx = Builder.CreateAdd(Induction, Step, "index.next");
+ Induction->addIncoming(StartIdx, VectorPH);
+ Induction->addIncoming(NextIdx, VecBody);
+ // Create the compare.
+ Value *ICmp = Builder.CreateICmpEQ(NextIdx, IdxEndRoundDown);
+ Builder.CreateCondBr(ICmp, MiddleBlock, VecBody);
+
+ // Now we have two terminators. Remove the old one from the block.
+ VecBody->getTerminator()->eraseFromParent();
+
+ // Fix the scalar body iteration count.
+ unsigned BlockIdx = OldInduction->getBasicBlockIndex(ScalarPH);
+ OldInduction->setIncomingValue(BlockIdx, ResumeIndex);
+
+ // Get ready to start creating new instructions into the vectorized body.
+ Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
+
+ // Register the new loop.
+ Loop* Lp = new Loop();
+ LPM->insertLoop(Lp, OrigLoop->getParentLoop());
+
+ Lp->addBasicBlockToLoop(VecBody, LI->getBase());
+
+ Loop *ParentLoop = OrigLoop->getParentLoop();
+ if (ParentLoop) {
+ ParentLoop->addBasicBlockToLoop(ScalarPH, LI->getBase());
+ ParentLoop->addBasicBlockToLoop(VectorPH, LI->getBase());
+ ParentLoop->addBasicBlockToLoop(MiddleBlock, LI->getBase());
+ }
+
+ // Save the state.
+ LoopVectorPreHeader = VectorPH;
+ LoopScalarPreHeader = ScalarPH;
+ LoopMiddleBlock = MiddleBlock;
+ LoopExitBlock = ExitBlock;
+ LoopVectorBody = VecBody;
+ LoopScalarBody = OldBasicBlock;
+ LoopBypassBlock = BypassBlock;
+}
+
+/// This function returns the identity element (or neutral element) for
+/// the operation K.
+static unsigned
+getReductionIdentity(LoopVectorizationLegality::ReductionKind K) {
+ switch (K) {
+ case LoopVectorizationLegality::IntegerXor:
+ case LoopVectorizationLegality::IntegerAdd:
+ case LoopVectorizationLegality::IntegerOr:
+ // Adding, Xoring, Oring zero to a number does not change it.
+ return 0;
+ case LoopVectorizationLegality::IntegerMult:
+ // Multiplying a number by 1 does not change it.
+ return 1;
+ case LoopVectorizationLegality::IntegerAnd:
+ // AND-ing a number with an all-1 value does not change it.
+ return -1;
+ default:
+ llvm_unreachable("Unknown reduction kind");
+ }
+}
+
+void
+SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
+ //===------------------------------------------------===//
+ //
+ // Notice: any optimization or new instruction that go
+ // into the code below should be also be implemented in
+ // the cost-model.
+ //
+ //===------------------------------------------------===//
+ typedef SmallVector<PHINode*, 4> PhiVector;
+ BasicBlock &BB = *OrigLoop->getHeader();
+ Constant *Zero = ConstantInt::get(
+ IntegerType::getInt32Ty(BB.getContext()), 0);
+
+ // In order to support reduction variables we need to be able to vectorize
+ // Phi nodes. Phi nodes have cycles, so we need to vectorize them in two
+ // steages. First, we create a new vector PHI node with no incoming edges.
+ // We use this value when we vectorize all of the instructions that use the
+ // PHI. Next, after all of the instructions in the block are complete we
+ // add the new incoming edges to the PHI. At this point all of the
+ // instructions in the basic block are vectorized, so we can use them to
+ // construct the PHI.
+ PhiVector PHIsToFix;
+
+ // For each instruction in the old loop.
+ for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
+ Instruction *Inst = it;
+
+ switch (Inst->getOpcode()) {
+ case Instruction::Br:
+ // Nothing to do for PHIs and BR, since we already took care of the
+ // loop control flow instructions.
+ continue;
+ case Instruction::PHI:{
+ PHINode* P = cast<PHINode>(Inst);
+ // Special handling for the induction var.
+ if (OldInduction == Inst)
+ continue;
+ // This is phase one of vectorizing PHIs.
+ // This has to be a reduction variable.
+ assert(Legal->getReductionVars()->count(P) && "Not a Reduction");
+ Type *VecTy = VectorType::get(Inst->getType(), VF);
+ WidenMap[Inst] = Builder.CreatePHI(VecTy, 2, "vec.phi");
+ PHIsToFix.push_back(P);
+ continue;
+ }
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ // Just widen binops.
+ BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
+ Value *A = getVectorValue(Inst->getOperand(0));
+ Value *B = getVectorValue(Inst->getOperand(1));
+
+ // Use this vector value for all users of the original instruction.
+ Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B);
+ WidenMap[Inst] = V;
+
+ // Update the NSW, NUW and Exact flags.
+ BinaryOperator *VecOp = cast<BinaryOperator>(V);
+ if (isa<OverflowingBinaryOperator>(BinOp)) {
+ VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap());
+ VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap());
+ }
+ if (isa<PossiblyExactOperator>(VecOp))
+ VecOp->setIsExact(BinOp->isExact());
+ break;
+ }
+ case Instruction::Select: {
+ // Widen selects.
+ // If the selector is loop invariant we can create a select
+ // instruction with a scalar condition. Otherwise, use vector-select.
+ Value *Cond = Inst->getOperand(0);
+ bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(Cond), OrigLoop);
+
+ // The condition can be loop invariant but still defined inside the
+ // loop. This means that we can't just use the original 'cond' value.
+ // We have to take the 'vectorized' value and pick the first lane.
+ // Instcombine will make this a no-op.
+ Cond = getVectorValue(Cond);
+ if (InvariantCond)
+ Cond = Builder.CreateExtractElement(Cond, Builder.getInt32(0));
+
+ Value *Op0 = getVectorValue(Inst->getOperand(1));
+ Value *Op1 = getVectorValue(Inst->getOperand(2));
+ WidenMap[Inst] = Builder.CreateSelect(Cond, Op0, Op1);
+ break;
+ }
+
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ // Widen compares. Generate vector compares.
+ bool FCmp = (Inst->getOpcode() == Instruction::FCmp);
+ CmpInst *Cmp = dyn_cast<CmpInst>(Inst);
+ Value *A = getVectorValue(Inst->getOperand(0));
+ Value *B = getVectorValue(Inst->getOperand(1));
+ if (FCmp)
+ WidenMap[Inst] = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
+ else
+ WidenMap[Inst] = Builder.CreateICmp(Cmp->getPredicate(), A, B);
+ break;
+ }
+
+ case Instruction::Store: {
+ // Attempt to issue a wide store.
+ StoreInst *SI = dyn_cast<StoreInst>(Inst);
+ Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF);
+ Value *Ptr = SI->getPointerOperand();
+ unsigned Alignment = SI->getAlignment();
+
+ assert(!Legal->isUniform(Ptr) &&
+ "We do not allow storing to uniform addresses");
+
+ GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+
+ // This store does not use GEPs.
+ if (!Legal->isConsecutiveGep(Gep)) {
+ scalarizeInstruction(Inst);
+ break;
+ }
+
+ // The last index does not have to be the induction. It can be
+ // consecutive and be a function of the index. For example A[I+1];
+ unsigned NumOperands = Gep->getNumOperands();
+ Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands - 1));
+ LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
+
+ // Create the new GEP with the new induction variable.
+ GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+ Gep2->setOperand(NumOperands - 1, LastIndex);
+ Ptr = Builder.Insert(Gep2);
+ Ptr = Builder.CreateBitCast(Ptr, StTy->getPointerTo());
+ Value *Val = getVectorValue(SI->getValueOperand());
+ Builder.CreateStore(Val, Ptr)->setAlignment(Alignment);
+ break;
+ }
+ case Instruction::Load: {
+ // Attempt to issue a wide load.
+ LoadInst *LI = dyn_cast<LoadInst>(Inst);
+ Type *RetTy = VectorType::get(LI->getType(), VF);
+ Value *Ptr = LI->getPointerOperand();
+ unsigned Alignment = LI->getAlignment();
+ GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+
+ // If we don't have a gep, or that the pointer is loop invariant,
+ // scalarize the load.
+ if (!Gep || Legal->isUniform(Gep) || !Legal->isConsecutiveGep(Gep)) {
+ scalarizeInstruction(Inst);
+ break;
+ }
+
+ // The last index does not have to be the induction. It can be
+ // consecutive and be a function of the index. For example A[I+1];
+ unsigned NumOperands = Gep->getNumOperands();
+ Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1));
+ LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
+
+ // Create the new GEP with the new induction variable.
+ GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+ Gep2->setOperand(NumOperands - 1, LastIndex);
+ Ptr = Builder.Insert(Gep2);
+ Ptr = Builder.CreateBitCast(Ptr, RetTy->getPointerTo());
+ LI = Builder.CreateLoad(Ptr);
+ LI->setAlignment(Alignment);
+ // Use this vector value for all users of the load.
+ WidenMap[Inst] = LI;
+ break;
+ }
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ /// Vectorize bitcasts.
+ CastInst *CI = dyn_cast<CastInst>(Inst);
+ Value *A = getVectorValue(Inst->getOperand(0));
+ Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF);
+ WidenMap[Inst] = Builder.CreateCast(CI->getOpcode(), A, DestTy);
+ break;
+ }
+
+ default:
+ /// All other instructions are unsupported. Scalarize them.
+ scalarizeInstruction(Inst);
+ break;
+ }// end of switch.
+ }// end of for_each instr.
+
+ // At this point every instruction in the original loop is widended to
+ // a vector form. We are almost done. Now, we need to fix the PHI nodes
+ // that we vectorized. The PHI nodes are currently empty because we did
+ // not want to introduce cycles. Notice that the remaining PHI nodes
+ // that we need to fix are reduction variables.
+
+ // Create the 'reduced' values for each of the induction vars.
+ // The reduced values are the vector values that we scalarize and combine
+ // after the loop is finished.
+ for (PhiVector::iterator it = PHIsToFix.begin(), e = PHIsToFix.end();
+ it != e; ++it) {
+ PHINode *RdxPhi = *it;
+ PHINode *VecRdxPhi = dyn_cast<PHINode>(WidenMap[RdxPhi]);
+ assert(RdxPhi && "Unable to recover vectorized PHI");
+
+ // Find the reduction variable descriptor.
+ assert(Legal->getReductionVars()->count(RdxPhi) &&
+ "Unable to find the reduction variable");
+ LoopVectorizationLegality::ReductionDescriptor RdxDesc =
+ (*Legal->getReductionVars())[RdxPhi];
+
+ // We need to generate a reduction vector from the incoming scalar.
+ // To do so, we need to generate the 'identity' vector and overide
+ // one of the elements with the incoming scalar reduction. We need
+ // to do it in the vector-loop preheader.
+ Builder.SetInsertPoint(LoopBypassBlock->getTerminator());
+
+ // This is the vector-clone of the value that leaves the loop.
+ Value *VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
+ Type *VecTy = VectorExit->getType();
+
+ // Find the reduction identity variable. Zero for addition, or, xor,
+ // one for multiplication, -1 for And.
+ Constant *Identity = getUniformVector(getReductionIdentity(RdxDesc.Kind),
+ VecTy->getScalarType());
+
+ // This vector is the Identity vector where the first element is the
+ // incoming scalar reduction.
+ Value *VectorStart = Builder.CreateInsertElement(Identity,
+ RdxDesc.StartValue, Zero);
+
+
+ // Fix the vector-loop phi.
+ // We created the induction variable so we know that the
+ // preheader is the first entry.
+ BasicBlock *VecPreheader = Induction->getIncomingBlock(0);
+
+ // Reductions do not have to start at zero. They can start with
+ // any loop invariant values.
+ VecRdxPhi->addIncoming(VectorStart, VecPreheader);
+ unsigned SelfEdgeIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody);
+ Value *Val = getVectorValue(RdxPhi->getIncomingValue(SelfEdgeIdx));
+ VecRdxPhi->addIncoming(Val, LoopVectorBody);
+
+ // Before each round, move the insertion point right between
+ // the PHIs and the values we are going to write.
+ // This allows us to write both PHINodes and the extractelement
+ // instructions.
+ Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt());
+
+ // This PHINode contains the vectorized reduction variable, or
+ // the initial value vector, if we bypass the vector loop.
+ PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
+ NewPhi->addIncoming(VectorStart, LoopBypassBlock);
+ NewPhi->addIncoming(getVectorValue(RdxDesc.LoopExitInstr), LoopVectorBody);
+
+ // Extract the first scalar.
+ Value *Scalar0 =
+ Builder.CreateExtractElement(NewPhi, Builder.getInt32(0));
+ // Extract and reduce the remaining vector elements.
+ for (unsigned i=1; i < VF; ++i) {
+ Value *Scalar1 =
+ Builder.CreateExtractElement(NewPhi, Builder.getInt32(i));
+ switch (RdxDesc.Kind) {
+ case LoopVectorizationLegality::IntegerAdd:
+ Scalar0 = Builder.CreateAdd(Scalar0, Scalar1);
+ break;
+ case LoopVectorizationLegality::IntegerMult:
+ Scalar0 = Builder.CreateMul(Scalar0, Scalar1);
+ break;
+ case LoopVectorizationLegality::IntegerOr:
+ Scalar0 = Builder.CreateOr(Scalar0, Scalar1);
+ break;
+ case LoopVectorizationLegality::IntegerAnd:
+ Scalar0 = Builder.CreateAnd(Scalar0, Scalar1);
+ break;
+ case LoopVectorizationLegality::IntegerXor:
+ Scalar0 = Builder.CreateXor(Scalar0, Scalar1);
+ break;
+ default:
+ llvm_unreachable("Unknown reduction operation");
+ }
+ }
+
+ // Now, we need to fix the users of the reduction variable
+ // inside and outside of the scalar remainder loop.
+ // We know that the loop is in LCSSA form. We need to update the
+ // PHI nodes in the exit blocks.
+ for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
+ LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
+ PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
+ if (!LCSSAPhi) continue;
+
+ // All PHINodes need to have a single entry edge, or two if
+ // we already fixed them.
+ assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI");
+
+ // We found our reduction value exit-PHI. Update it with the
+ // incoming bypass edge.
+ if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
+ // Add an edge coming from the bypass.
+ LCSSAPhi->addIncoming(Scalar0, LoopMiddleBlock);
+ break;
+ }
+ }// end of the LCSSA phi scan.
+
+ // Fix the scalar loop reduction variable with the incoming reduction sum
+ // from the vector body and from the backedge value.
+ int IncomingEdgeBlockIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody);
+ int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); // The other block.
+ (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0);
+ (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
+ }// end of for each redux variable.
+}
+
+void SingleBlockLoopVectorizer::updateAnalysis() {
+ // The original basic block.
+ SE->forgetLoop(OrigLoop);
+
+ // Update the dominator tree information.
+ assert(DT->properlyDominates(LoopBypassBlock, LoopExitBlock) &&
+ "Entry does not dominate exit.");
+
+ DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlock);
+ DT->addNewBlock(LoopVectorBody, LoopVectorPreHeader);
+ DT->addNewBlock(LoopMiddleBlock, LoopBypassBlock);
+ DT->addNewBlock(LoopScalarPreHeader, LoopMiddleBlock);
+ DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader);
+ DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock);
+
+ DEBUG(DT->verifyAnalysis());
+}
+
+bool LoopVectorizationLegality::canVectorize() {
+ if (!TheLoop->getLoopPreheader()) {
+ assert(false && "No preheader!!");
+ DEBUG(dbgs() << "LV: Loop not normalized." << "\n");
+ return false;
+ }
+
+ // We can only vectorize single basic block loops.
+ unsigned NumBlocks = TheLoop->getNumBlocks();
+ if (NumBlocks != 1) {
+ DEBUG(dbgs() << "LV: Too many blocks:" << NumBlocks << "\n");
+ return false;
+ }
+
+ // We need to have a loop header.
+ BasicBlock *BB = TheLoop->getHeader();
+ DEBUG(dbgs() << "LV: Found a loop: " << BB->getName() << "\n");
+
+ // ScalarEvolution needs to be able to find the exit count.
+ const SCEV *ExitCount = SE->getExitCount(TheLoop, BB);
+ if (ExitCount == SE->getCouldNotCompute()) {
+ DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
+ return false;
+ }
+
+ // Do not loop-vectorize loops with a tiny trip count.
+ unsigned TC = SE->getSmallConstantTripCount(TheLoop, BB);
+ if (TC > 0u && TC < TinyTripCountThreshold) {
+ DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " <<
+ "This loop is not worth vectorizing.\n");
+ return false;
+ }
+
+ // Go over each instruction and look at memory deps.
+ if (!canVectorizeBlock(*BB)) {
+ DEBUG(dbgs() << "LV: Can't vectorize this loop header\n");
+ return false;
+ }
+
+ DEBUG(dbgs() << "LV: We can vectorize this loop" <<
+ (PtrRtCheck.Need ? " (with a runtime bound check)" : "")
+ <<"!\n");
+
+ // Okay! We can vectorize. At this point we don't have any other mem analysis
+ // which may limit our maximum vectorization factor, so just return true with
+ // no restrictions.
+ return true;
+}
+
+bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
+ // Scan the instructions in the block and look for hazards.
+ for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
+ Instruction *I = it;
+
+ PHINode *Phi = dyn_cast<PHINode>(I);
+ if (Phi) {
+ // This should not happen because the loop should be normalized.
+ if (Phi->getNumIncomingValues() != 2) {
+ DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
+ return false;
+ }
+ // We only look at integer phi nodes.
+ if (!Phi->getType()->isIntegerTy()) {
+ DEBUG(dbgs() << "LV: Found an non-int PHI.\n");
+ return false;
+ }
+
+ if (isInductionVariable(Phi)) {
+ if (Induction) {
+ DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n");
+ return false;
+ }
+ DEBUG(dbgs() << "LV: Found the induction PHI."<< *Phi <<"\n");
+ Induction = Phi;
+ continue;
+ }
+ if (AddReductionVar(Phi, IntegerAdd)) {
+ DEBUG(dbgs() << "LV: Found an ADD reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
+ if (AddReductionVar(Phi, IntegerMult)) {
+ DEBUG(dbgs() << "LV: Found a MUL reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
+ if (AddReductionVar(Phi, IntegerOr)) {
+ DEBUG(dbgs() << "LV: Found an OR reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
+ if (AddReductionVar(Phi, IntegerAnd)) {
+ DEBUG(dbgs() << "LV: Found an AND reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
+ if (AddReductionVar(Phi, IntegerXor)) {
+ DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
+
+ DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
+ return false;
+ }// end of PHI handling
+
+ // We still don't handle functions.
+ CallInst *CI = dyn_cast<CallInst>(I);
+ if (CI) {
+ DEBUG(dbgs() << "LV: Found a call site.\n");
+ return false;
+ }
+
+ // We do not re-vectorize vectors.
+ if (!VectorType::isValidElementType(I->getType()) &&
+ !I->getType()->isVoidTy()) {
+ DEBUG(dbgs() << "LV: Found unvectorizable type." << "\n");
+ return false;
+ }
+
+ // Reduction instructions are allowed to have exit users.
+ // All other instructions must not have external users.
+ if (!AllowedExit.count(I))
+ //Check that all of the users of the loop are inside the BB.
+ for (Value::use_iterator it = I->use_begin(), e = I->use_end();
+ it != e; ++it) {
+ Instruction *U = cast<Instruction>(*it);
+ // This user may be a reduction exit value.
+ BasicBlock *Parent = U->getParent();
+ if (Parent != &BB) {
+ DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
+ return false;
+ }
+ }
+ } // next instr.
+
+ if (!Induction) {
+ DEBUG(dbgs() << "LV: Did not find an induction var.\n");
+ return false;
+ }
+
+ // Don't vectorize if the memory dependencies do not allow vectorization.
+ if (!canVectorizeMemory(BB))
+ return false;
+
+ // We now know that the loop is vectorizable!
+ // Collect variables that will remain uniform after vectorization.
+ std::vector<Value*> Worklist;
+
+ // Start with the conditional branch and walk up the block.
+ Worklist.push_back(BB.getTerminator()->getOperand(0));
+
+ while (Worklist.size()) {
+ Instruction *I = dyn_cast<Instruction>(Worklist.back());
+ Worklist.pop_back();
+ // Look at instructions inside this block.
+ if (!I) continue;
+ if (I->getParent() != &BB) continue;
+
+ // Stop when reaching PHI nodes.
+ if (isa<PHINode>(I)) {
+ assert(I == Induction && "Found a uniform PHI that is not the induction");
+ break;
+ }
+
+ // This is a known uniform.
+ Uniforms.insert(I);
+
+ // Insert all operands.
+ for (int i=0, Op = I->getNumOperands(); i < Op; ++i) {
+ Worklist.push_back(I->getOperand(i));
+ }
+ }
+
+ return true;
+}
+
+bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
+ typedef SmallVector<Value*, 16> ValueVector;
+ typedef SmallPtrSet<Value*, 16> ValueSet;
+ // Holds the Load and Store *instructions*.
+ ValueVector Loads;
+ ValueVector Stores;
+ PtrRtCheck.Pointers.clear();
+ PtrRtCheck.Need = false;
+
+ // Scan the BB and collect legal loads and stores.
+ for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
+ Instruction *I = it;
+
+ // If this is a load, save it. If this instruction can read from memory
+ // but is not a load, then we quit. Notice that we don't handle function
+ // calls that read or write.
+ if (I->mayReadFromMemory()) {
+ LoadInst *Ld = dyn_cast<LoadInst>(I);
+ if (!Ld) return false;
+ if (!Ld->isSimple()) {
+ DEBUG(dbgs() << "LV: Found a non-simple load.\n");
+ return false;
+ }
+ Loads.push_back(Ld);
+ continue;
+ }
+
+ // Save store instructions. Abort if other instructions write to memory.
+ if (I->mayWriteToMemory()) {
+ StoreInst *St = dyn_cast<StoreInst>(I);
+ if (!St) return false;
+ if (!St->isSimple()) {
+ DEBUG(dbgs() << "LV: Found a non-simple store.\n");
+ return false;
+ }
+ Stores.push_back(St);
+ }
+ } // next instr.
+
+ // Now we have two lists that hold the loads and the stores.
+ // Next, we find the pointers that they use.
+
+ // Check if we see any stores. If there are no stores, then we don't
+ // care if the pointers are *restrict*.
+ if (!Stores.size()) {
+ DEBUG(dbgs() << "LV: Found a read-only loop!\n");
+ return true;
+ }
+
+ // Holds the read and read-write *pointers* that we find.
+ ValueVector Reads;
+ ValueVector ReadWrites;
+
+ // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
+ // multiple times on the same object. If the ptr is accessed twice, once
+ // for read and once for write, it will only appear once (on the write
+ // list). This is okay, since we are going to check for conflicts between
+ // writes and between reads and writes, but not between reads and reads.
+ ValueSet Seen;
+
+ ValueVector::iterator I, IE;
+ for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) {
+ StoreInst *ST = dyn_cast<StoreInst>(*I);
+ assert(ST && "Bad StoreInst");
+ Value* Ptr = ST->getPointerOperand();
+
+ if (isUniform(Ptr)) {
+ DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
+ return false;
+ }
+
+ // If we did *not* see this pointer before, insert it to
+ // the read-write list. At this phase it is only a 'write' list.
+ if (Seen.insert(Ptr))
+ ReadWrites.push_back(Ptr);
+ }
+
+ for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
+ LoadInst *LD = dyn_cast<LoadInst>(*I);
+ assert(LD && "Bad LoadInst");
+ Value* Ptr = LD->getPointerOperand();
+ // If we did *not* see this pointer before, insert it to the
+ // read list. If we *did* see it before, then it is already in
+ // the read-write list. This allows us to vectorize expressions
+ // such as A[i] += x; Because the address of A[i] is a read-write
+ // pointer. This only works if the index of A[i] is consecutive.
+ // If the address of i is unknown (for example A[B[i]]) then we may
+ // read a few words, modify, and write a few words, and some of the
+ // words may be written to the same address.
+ if (Seen.insert(Ptr) || !isConsecutiveGep(Ptr))
+ Reads.push_back(Ptr);
+ }
+
+ // If we write (or read-write) to a single destination and there are no
+ // other reads in this loop then is it safe to vectorize.
+ if (ReadWrites.size() == 1 && Reads.size() == 0) {
+ DEBUG(dbgs() << "LV: Found a write-only loop!\n");
+ return true;
+ }
+
+ // Find pointers with computable bounds. We are going to use this information
+ // to place a runtime bound check.
+ bool RT = true;
+ for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I)
+ if (hasComputableBounds(*I)) {
+ PtrRtCheck.Pointers.push_back(*I);
+ DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n");
+ } else {
+ RT = false;
+ break;
+ }
+ for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I)
+ if (hasComputableBounds(*I)) {
+ PtrRtCheck.Pointers.push_back(*I);
+ DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n");
+ } else {
+ RT = false;
+ break;
+ }
+
+ // Check that we did not collect too many pointers or found a
+ // unsizeable pointer.
+ if (!RT || PtrRtCheck.Pointers.size() > RuntimeMemoryCheckThreshold) {
+ PtrRtCheck.Pointers.clear();
+ RT = false;
+ }
+
+ PtrRtCheck.Need = RT;
+
+ if (RT) {
+ DEBUG(dbgs() << "LV: We can perform a memory runtime check if needed.\n");
+ }
+
+ // Now that the pointers are in two lists (Reads and ReadWrites), we
+ // can check that there are no conflicts between each of the writes and
+ // between the writes to the reads.
+ ValueSet WriteObjects;
+ ValueVector TempObjects;
+
+ // Check that the read-writes do not conflict with other read-write
+ // pointers.
+ for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I) {
+ GetUnderlyingObjects(*I, TempObjects, DL);
+ for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end();
+ it != e; ++it) {
+ if (!isIdentifiedObject(*it)) {
+ DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **it <<"\n");
+ return RT;
+ }
+ if (!WriteObjects.insert(*it)) {
+ DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
+ << **it <<"\n");
+ return RT;
+ }
+ }
+ TempObjects.clear();
+ }
+
+ /// Check that the reads don't conflict with the read-writes.
+ for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I) {
+ GetUnderlyingObjects(*I, TempObjects, DL);
+ for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end();
+ it != e; ++it) {
+ if (!isIdentifiedObject(*it)) {
+ DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **it <<"\n");
+ return RT;
+ }
+ if (WriteObjects.count(*it)) {
+ DEBUG(dbgs() << "LV: Found a possible read/write reorder:"
+ << **it <<"\n");
+ return RT;
+ }
+ }
+ TempObjects.clear();
+ }
+
+ // It is safe to vectorize and we don't need any runtime checks.
+ DEBUG(dbgs() << "LV: We don't need a runtime memory check.\n");
+ PtrRtCheck.Pointers.clear();
+ PtrRtCheck.Need = false;
+ return true;
+}
+
+bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
+ ReductionKind Kind) {
+ if (Phi->getNumIncomingValues() != 2)
+ return false;
+
+ // Find the possible incoming reduction variable.
+ BasicBlock *BB = Phi->getParent();
+ int SelfEdgeIdx = Phi->getBasicBlockIndex(BB);
+ int InEdgeBlockIdx = (SelfEdgeIdx ? 0 : 1); // The other entry.
+ Value *RdxStart = Phi->getIncomingValue(InEdgeBlockIdx);
+
+ // ExitInstruction is the single value which is used outside the loop.
+ // We only allow for a single reduction value to be used outside the loop.
+ // This includes users of the reduction, variables (which form a cycle
+ // which ends in the phi node).
+ Instruction *ExitInstruction = 0;
+
+ // Iter is our iterator. We start with the PHI node and scan for all of the
+ // users of this instruction. All users must be instructions which can be
+ // used as reduction variables (such as ADD). We may have a single
+ // out-of-block user. They cycle must end with the original PHI.
+ // Also, we can't have multiple block-local users.
+ Instruction *Iter = Phi;
+ while (true) {
+ // Any reduction instr must be of one of the allowed kinds.
+ if (!isReductionInstr(Iter, Kind))
+ return false;
+
+ // Did we found a user inside this block ?
+ bool FoundInBlockUser = false;
+ // Did we reach the initial PHI node ?
+ bool FoundStartPHI = false;
+
+ // If the instruction has no users then this is a broken
+ // chain and can't be a reduction variable.
+ if (Iter->use_empty())
+ return false;
+
+ // For each of the *users* of iter.
+ for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end();
+ it != e; ++it) {
+ Instruction *U = cast<Instruction>(*it);
+ // We already know that the PHI is a user.
+ if (U == Phi) {
+ FoundStartPHI = true;
+ continue;
+ }
+ // Check if we found the exit user.
+ BasicBlock *Parent = U->getParent();
+ if (Parent != BB) {
+ // We must have a single exit instruction.
+ if (ExitInstruction != 0)
+ return false;
+ ExitInstruction = Iter;
+ }
+ // We can't have multiple inside users.
+ if (FoundInBlockUser)
+ return false;
+ FoundInBlockUser = true;
+ Iter = U;
+ }
+
+ // We found a reduction var if we have reached the original
+ // phi node and we only have a single instruction with out-of-loop
+ // users.
+ if (FoundStartPHI && ExitInstruction) {
+ // This instruction is allowed to have out-of-loop users.
+ AllowedExit.insert(ExitInstruction);
+
+ // Save the description of this reduction variable.
+ ReductionDescriptor RD(RdxStart, ExitInstruction, Kind);
+ Reductions[Phi] = RD;
+ return true;
+ }
+ }
+}
+
+bool
+LoopVectorizationLegality::isReductionInstr(Instruction *I,
+ ReductionKind Kind) {
+ switch (I->getOpcode()) {
+ default:
+ return false;
+ case Instruction::PHI:
+ // possibly.
+ return true;
+ case Instruction::Add:
+ case Instruction::Sub:
+ return Kind == IntegerAdd;
+ case Instruction::Mul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ return Kind == IntegerMult;
+ case Instruction::And:
+ return Kind == IntegerAnd;
+ case Instruction::Or:
+ return Kind == IntegerOr;
+ case Instruction::Xor:
+ return Kind == IntegerXor;
+ }
+}
+
+bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
+ // Check that the PHI is consecutive and starts at zero.
+ const SCEV *PhiScev = SE->getSCEV(Phi);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
+ if (!AR) {
+ DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
+ return false;
+ }
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+
+ if (!Step->isOne()) {
+ DEBUG(dbgs() << "LV: PHI stride does not equal one.\n");
+ return false;
+ }
+ return true;
+}
+
+bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) {
+ const SCEV *PhiScev = SE->getSCEV(Ptr);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
+ if (!AR)
+ return false;
+
+ return AR->isAffine();
+}
+
+unsigned
+LoopVectorizationCostModel::findBestVectorizationFactor(unsigned VF) {
+ if (!VTTI) {
+ DEBUG(dbgs() << "LV: No vector target information. Not vectorizing. \n");
+ return 1;
+ }
+
+ float Cost = expectedCost(1);
+ unsigned Width = 1;
+ DEBUG(dbgs() << "LV: Scalar loop costs: "<< (int)Cost << ".\n");
+ for (unsigned i=2; i <= VF; i*=2) {
+ // Notice that the vector loop needs to be executed less times, so
+ // we need to divide the cost of the vector loops by the width of
+ // the vector elements.
+ float VectorCost = expectedCost(i) / (float)i;
+ DEBUG(dbgs() << "LV: Vector loop of width "<< i << " costs: " <<
+ (int)VectorCost << ".\n");
+ if (VectorCost < Cost) {
+ Cost = VectorCost;
+ Width = i;
+ }
+ }
+
+ DEBUG(dbgs() << "LV: Selecting VF = : "<< Width << ".\n");
+ return Width;
+}
+
+unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
+ // We can only estimate the cost of single basic block loops.
+ assert(1 == TheLoop->getNumBlocks() && "Too many blocks in loop");
+
+ BasicBlock *BB = TheLoop->getHeader();
+ unsigned Cost = 0;
+
+ // For each instruction in the old loop.
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ Instruction *Inst = it;
+ unsigned C = getInstructionCost(Inst, VF);
+ Cost += C;
+ DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF "<< VF <<
+ " For instruction: "<< *Inst << "\n");
+ }
+
+ return Cost;
+}
+
+unsigned
+LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
+ assert(VTTI && "Invalid vector target transformation info");
+
+ // If we know that this instruction will remain uniform, check the cost of
+ // the scalar version.
+ if (Legal->isUniformAfterVectorization(I))
+ VF = 1;
+
+ Type *RetTy = I->getType();
+ Type *VectorTy = ToVectorTy(RetTy, VF);
+
+
+ // TODO: We need to estimate the cost of intrinsic calls.
+ switch (I->getOpcode()) {
+ case Instruction::GetElementPtr:
+ // We mark this instruction as zero-cost because scalar GEPs are usually
+ // lowered to the intruction addressing mode. At the moment we don't
+ // generate vector geps.
+ return 0;
+ case Instruction::Br: {
+ return VTTI->getCFInstrCost(I->getOpcode());
+ }
+ case Instruction::PHI:
+ return 0;
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ return VTTI->getArithmeticInstrCost(I->getOpcode(), VectorTy);
+ }
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(I);
+ const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
+ bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
+ Type *CondTy = SI->getCondition()->getType();
+ if (ScalarCond)
+ CondTy = VectorType::get(CondTy, VF);
+
+ return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy);
+ }
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ Type *ValTy = I->getOperand(0)->getType();
+ VectorTy = ToVectorTy(ValTy, VF);
+ return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy);
+ }
+ case Instruction::Store: {
+ StoreInst *SI = cast<StoreInst>(I);
+ Type *ValTy = SI->getValueOperand()->getType();
+ VectorTy = ToVectorTy(ValTy, VF);
+
+ if (VF == 1)
+ return VTTI->getMemoryOpCost(I->getOpcode(), ValTy,
+ SI->getAlignment(), SI->getPointerAddressSpace());
+
+ // Scalarized stores.
+ if (!Legal->isConsecutiveGep(SI->getPointerOperand())) {
+ unsigned Cost = 0;
+ unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
+ ValTy);
+ // The cost of extracting from the value vector.
+ Cost += VF * (ExtCost);
+ // The cost of the scalar stores.
+ Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
+ ValTy->getScalarType(),
+ SI->getAlignment(),
+ SI->getPointerAddressSpace());
+ return Cost;
+ }
+
+ // Wide stores.
+ return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, SI->getAlignment(),
+ SI->getPointerAddressSpace());
+ }
+ case Instruction::Load: {
+ LoadInst *LI = cast<LoadInst>(I);
+
+ if (VF == 1)
+ return VTTI->getMemoryOpCost(I->getOpcode(), RetTy,
+ LI->getAlignment(),
+ LI->getPointerAddressSpace());
+
+ // Scalarized loads.
+ if (!Legal->isConsecutiveGep(LI->getPointerOperand())) {
+ unsigned Cost = 0;
+ unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, RetTy);
+ // The cost of inserting the loaded value into the result vector.
+ Cost += VF * (InCost);
+ // The cost of the scalar stores.
+ Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
+ RetTy->getScalarType(),
+ LI->getAlignment(),
+ LI->getPointerAddressSpace());
+ return Cost;
+ }
+
+ // Wide loads.
+ return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, LI->getAlignment(),
+ LI->getPointerAddressSpace());
+ }
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
+ return VTTI->getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
+ }
+ default: {
+ // We are scalarizing the instruction. Return the cost of the scalar
+ // instruction, plus the cost of insert and extract into vector
+ // elements, times the vector width.
+ unsigned Cost = 0;
+
+ bool IsVoid = RetTy->isVoidTy();
+
+ unsigned InsCost = (IsVoid ? 0 :
+ VTTI->getInstrCost(Instruction::InsertElement,
+ VectorTy));
+
+ unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
+ VectorTy);
+
+ // The cost of inserting the results plus extracting each one of the
+ // operands.
+ Cost += VF * (InsCost + ExtCost * I->getNumOperands());
+
+ // The cost of executing VF copies of the scalar instruction.
+ Cost += VF * VTTI->getInstrCost(I->getOpcode(), RetTy);
+ return Cost;
+ }
+ }// end of switch.
+}
+
+Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) {
+ if (Scalar->isVoidTy() || VF == 1)
+ return Scalar;
+ return VectorType::get(Scalar, VF);
+}
+
+} // namespace
+
+char LoopVectorize::ID = 0;
+static const char lv_name[] = "Loop Vectorization";
+INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
+
+namespace llvm {
+ Pass *createLoopVectorizePass() {
+ return new LoopVectorize();
+ }
+}
+
diff --git a/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
index 1ef6002..d26973a 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements common infrastructure for libLLVMVectorizeOpts.a, which
+// This file implements common infrastructure for libLLVMVectorizeOpts.a, which
// implements several vectorization transformations over the LLVM intermediate
// representation, including the C bindings for that library.
//
@@ -23,10 +23,11 @@
using namespace llvm;
-/// initializeVectorizationPasses - Initialize all passes linked into the
+/// initializeVectorizationPasses - Initialize all passes linked into the
/// Vectorization library.
void llvm::initializeVectorization(PassRegistry &Registry) {
initializeBBVectorizePass(Registry);
+ initializeLoopVectorizePass(Registry);
}
void LLVMInitializeVectorization(LLVMPassRegistryRef R) {
@@ -37,3 +38,6 @@ void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createBBVectorizePass());
}
+void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopVectorizePass());
+}
OpenPOWER on IntegriCloud