summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms')
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp117
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp86
-rw-r--r--contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp76
-rw-r--r--contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp30
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp141
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp830
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPO.cpp38
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp15
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Inliner.cpp56
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Internalize.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp646
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp216
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PruneEH.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp24
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombine.h28
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp350
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp597
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp288
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp35
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp772
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp315
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp79
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp294
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp116
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp100
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp272
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp604
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp32
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp1423
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp22
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h7
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ADCE.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp369
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp86
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DCE.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp847
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp470
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVN.cpp813
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp49
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp998
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp324
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp26
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp594
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp1270
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp170
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp491
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp200
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp57
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp60
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp146
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp729
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp38
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SCCP.cpp39
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalar.cpp55
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp1155
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp342
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Sink.cpp20
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp18
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp139
-rw-r--r--contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp28
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp157
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp56
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp54
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp40
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneLoop.cpp45
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneModule.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp173
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LCSSA.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp148
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp69
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp38
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp27
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp209
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp171
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp2003
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp94
-rw-r--r--contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Utils.cpp37
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp178
102 files changed, 12551 insertions, 8353 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 0c77e1f..0c650cf 100644
--- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -39,7 +39,6 @@
#include "llvm/LLVMContext.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Target/TargetData.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
@@ -67,7 +66,9 @@ namespace {
virtual bool runOnSCC(CallGraphSCC &SCC);
static char ID; // Pass identification, replacement for typeid
explicit ArgPromotion(unsigned maxElements = 3)
- : CallGraphSCCPass(ID), maxElements(maxElements) {}
+ : CallGraphSCCPass(ID), maxElements(maxElements) {
+ initializeArgPromotionPass(*PassRegistry::getPassRegistry());
+ }
/// A vector used to hold the indices of a single GEP instruction
typedef std::vector<uint64_t> IndicesVector;
@@ -84,8 +85,12 @@ namespace {
}
char ArgPromotion::ID = 0;
-INITIALIZE_PASS(ArgPromotion, "argpromotion",
- "Promote 'by reference' arguments to scalars", false, false);
+INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
+ "Promote 'by reference' arguments to scalars", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
+ "Promote 'by reference' arguments to scalars", false, false)
Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
return new ArgPromotion(maxElements);
@@ -130,47 +135,74 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
if (PointerArgs.empty()) return 0;
// Second check: make sure that all callers are direct callers. We can't
- // transform functions that have indirect callers.
- if (F->hasAddressTaken())
- return 0;
-
+ // transform functions that have indirect callers. Also see if the function
+ // is self-recursive.
+ bool isSelfRecursive = false;
+ for (Value::use_iterator UI = F->use_begin(), E = F->use_end();
+ UI != E; ++UI) {
+ CallSite CS(*UI);
+ // Must be a direct call.
+ if (CS.getInstruction() == 0 || !CS.isCallee(UI)) return 0;
+
+ if (CS.getInstruction()->getParent()->getParent() == F)
+ isSelfRecursive = true;
+ }
+
// Check to see which arguments are promotable. If an argument is promotable,
// add it to ArgsToPromote.
SmallPtrSet<Argument*, 8> ArgsToPromote;
SmallPtrSet<Argument*, 8> ByValArgsToTransform;
for (unsigned i = 0; i != PointerArgs.size(); ++i) {
bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal);
+ Argument *PtrArg = PointerArgs[i].first;
+ const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
// If this is a byval argument, and if the aggregate type is small, just
// pass the elements, which is always safe.
- Argument *PtrArg = PointerArgs[i].first;
if (isByVal) {
- const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
if (const StructType *STy = dyn_cast<StructType>(AgTy)) {
if (maxElements > 0 && STy->getNumElements() > maxElements) {
DEBUG(dbgs() << "argpromotion disable promoting argument '"
<< PtrArg->getName() << "' because it would require adding more"
<< " than " << maxElements << " arguments to the function.\n");
- } else {
- // If all the elements are single-value types, we can promote it.
- bool AllSimple = true;
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
- if (!STy->getElementType(i)->isSingleValueType()) {
- AllSimple = false;
- break;
- }
-
- // Safe to transform, don't even bother trying to "promote" it.
- // Passing the elements as a scalar will allow scalarrepl to hack on
- // the new alloca we introduce.
- if (AllSimple) {
- ByValArgsToTransform.insert(PtrArg);
- continue;
+ continue;
+ }
+
+ // If all the elements are single-value types, we can promote it.
+ bool AllSimple = true;
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ if (!STy->getElementType(i)->isSingleValueType()) {
+ AllSimple = false;
+ break;
}
}
+
+ // Safe to transform, don't even bother trying to "promote" it.
+ // Passing the elements as a scalar will allow scalarrepl to hack on
+ // the new alloca we introduce.
+ if (AllSimple) {
+ ByValArgsToTransform.insert(PtrArg);
+ continue;
+ }
}
}
+ // If the argument is a recursive type and we're in a recursive
+ // function, we could end up infinitely peeling the function argument.
+ if (isSelfRecursive) {
+ if (const StructType *STy = dyn_cast<StructType>(AgTy)) {
+ bool RecursiveType = false;
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ if (STy->getElementType(i) == PtrArg->getType()) {
+ RecursiveType = true;
+ break;
+ }
+ }
+ if (RecursiveType)
+ continue;
+ }
+ }
+
// Otherwise, see if we can promote the pointer to its value.
if (isSafeToPromoteArgument(PtrArg, isByVal))
ArgsToPromote.insert(PtrArg);
@@ -183,22 +215,9 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
}
-/// IsAlwaysValidPointer - Return true if the specified pointer is always legal
-/// to load.
-static bool IsAlwaysValidPointer(Value *V) {
- if (isa<AllocaInst>(V) || isa<GlobalVariable>(V)) return true;
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V))
- return IsAlwaysValidPointer(GEP->getOperand(0));
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- if (CE->getOpcode() == Instruction::GetElementPtr)
- return IsAlwaysValidPointer(CE->getOperand(0));
-
- return false;
-}
-
-/// AllCalleesPassInValidPointerForArgument - Return true if we can prove that
+/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
/// all callees pass in a valid pointer for the specified function argument.
-static bool AllCalleesPassInValidPointerForArgument(Argument *Arg) {
+static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
Function *Callee = Arg->getParent();
unsigned ArgNo = std::distance(Callee->arg_begin(),
@@ -211,7 +230,7 @@ static bool AllCalleesPassInValidPointerForArgument(Argument *Arg) {
CallSite CS(*UI);
assert(CS && "Should only have direct calls!");
- if (!IsAlwaysValidPointer(CS.getArgument(ArgNo)))
+ if (!CS.getArgument(ArgNo)->isDereferenceablePointer())
return false;
}
return true;
@@ -318,7 +337,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
GEPIndicesSet ToPromote;
// If the pointer is always valid, any load with first index 0 is valid.
- if (isByVal || AllCalleesPassInValidPointerForArgument(Arg))
+ if (isByVal || AllCallersPassInValidPointerForArgument(Arg))
SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
// First, iterate the entry block and mark loads of (geps of) arguments as
@@ -434,8 +453,6 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
SmallPtrSet<BasicBlock*, 16> TranspBlocks;
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- TargetData *TD = getAnalysisIfAvailable<TargetData>();
- if (!TD) return false; // Without TargetData, assume the worst.
for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
// Check to see if the load is invalidated from the start of the block to
@@ -443,11 +460,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
LoadInst *Load = Loads[i];
BasicBlock *BB = Load->getParent();
- const PointerType *LoadTy =
- cast<PointerType>(Load->getPointerOperand()->getType());
- unsigned LoadSize =(unsigned)TD->getTypeStoreSize(LoadTy->getElementType());
-
- if (AA.canInstructionRangeModify(BB->front(), *Load, Arg, LoadSize))
+ AliasAnalysis::Location Loc = AA.getLocation(Load);
+ if (AA.canInstructionRangeModify(BB->front(), *Load, Loc))
return false; // Pointer is invalidated!
// Now check every path from the entry block to the load for transparency.
@@ -458,7 +472,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> >
I = idf_ext_begin(P, TranspBlocks),
E = idf_ext_end(P, TranspBlocks); I != E; ++I)
- if (AA.canBasicBlockModify(**I, Arg, LoadSize))
+ if (AA.canBasicBlockModify(**I, Loc))
return false;
}
}
@@ -694,6 +708,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// of the previous load.
LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
newLoad->setAlignment(OrigLoad->getAlignment());
+ // Transfer the TBAA info too.
+ newLoad->setMetadata(LLVMContext::MD_tbaa,
+ OrigLoad->getMetadata(LLVMContext::MD_tbaa));
Args.push_back(newLoad);
AA.copyValue(OrigLoad, Args.back());
}
diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index 64e8d79..a21efce 100644
--- a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -33,7 +33,9 @@ STATISTIC(NumMerged, "Number of global constants merged");
namespace {
struct ConstantMerge : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- ConstantMerge() : ModulePass(ID) {}
+ ConstantMerge() : ModulePass(ID) {
+ initializeConstantMergePass(*PassRegistry::getPassRegistry());
+ }
// run - For this pass, process all of the globals in the module,
// eliminating duplicate constants.
@@ -44,7 +46,7 @@ namespace {
char ConstantMerge::ID = 0;
INITIALIZE_PASS(ConstantMerge, "constmerge",
- "Merge Duplicate Global Constants", false, false);
+ "Merge Duplicate Global Constants", false, false)
ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
@@ -63,6 +65,18 @@ static void FindUsedValues(GlobalVariable *LLVMUsed,
UsedValues.insert(GV);
}
+// True if A is better than B.
+static bool IsBetterCannonical(const GlobalVariable &A,
+ const GlobalVariable &B) {
+ if (!A.hasLocalLinkage() && B.hasLocalLinkage())
+ return true;
+
+ if (A.hasLocalLinkage() && !B.hasLocalLinkage())
+ return false;
+
+ return A.hasUnnamedAddr();
+}
+
bool ConstantMerge::runOnModule(Module &M) {
// Find all the globals that are marked "used". These cannot be merged.
SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
@@ -83,44 +97,76 @@ bool ConstantMerge::runOnModule(Module &M) {
// second level constants have initializers which point to the globals that
// were just merged.
while (1) {
- // First pass: identify all globals that can be merged together, filling in
- // the Replacements vector. We cannot do the replacement in this pass
- // because doing so may cause initializers of other globals to be rewritten,
- // invalidating the Constant* pointers in CMap.
- //
+
+ // First: Find the canonical constants others will be merged with.
for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
GVI != E; ) {
GlobalVariable *GV = GVI++;
-
+
// If this GV is dead, remove it.
GV->removeDeadConstantUsers();
if (GV->use_empty() && GV->hasLocalLinkage()) {
GV->eraseFromParent();
continue;
}
-
- // Only process constants with initializers in the default addres space.
- if (!GV->isConstant() ||!GV->hasDefinitiveInitializer() ||
- GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty() ||
+
+ // Only process constants with initializers in the default address space.
+ if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+ GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
// Don't touch values marked with attribute(used).
UsedGlobals.count(GV))
continue;
-
-
-
+
Constant *Init = GV->getInitializer();
// Check to see if the initializer is already known.
GlobalVariable *&Slot = CMap[Init];
- if (Slot == 0) { // Nope, add it to the map.
+ // If this is the first constant we find or if the old on is local,
+ // replace with the current one. It the current is externally visible
+ // it cannot be replace, but can be the canonical constant we merge with.
+ if (Slot == 0 || IsBetterCannonical(*GV, *Slot)) {
Slot = GV;
- } else if (GV->hasLocalLinkage()) { // Yup, this is a duplicate!
- // Make all uses of the duplicate constant use the canonical version.
- Replacements.push_back(std::make_pair(GV, Slot));
}
}
+ // Second: identify all globals that can be merged together, filling in
+ // the Replacements vector. We cannot do the replacement in this pass
+ // because doing so may cause initializers of other globals to be rewritten,
+ // invalidating the Constant* pointers in CMap.
+ for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+ GVI != E; ) {
+ GlobalVariable *GV = GVI++;
+
+ // Only process constants with initializers in the default address space.
+ if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+ GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
+ // Don't touch values marked with attribute(used).
+ UsedGlobals.count(GV))
+ continue;
+
+ // We can only replace constant with local linkage.
+ if (!GV->hasLocalLinkage())
+ continue;
+
+ Constant *Init = GV->getInitializer();
+
+ // Check to see if the initializer is already known.
+ GlobalVariable *Slot = CMap[Init];
+
+ if (!Slot || Slot == GV)
+ continue;
+
+ if (!Slot->hasUnnamedAddr() && !GV->hasUnnamedAddr())
+ continue;
+
+ if (!GV->hasUnnamedAddr())
+ Slot->setUnnamedAddr(false);
+
+ // Make all uses of the duplicate constant use the canonical version.
+ Replacements.push_back(std::make_pair(GV, Slot));
+ }
+
if (Replacements.empty())
return MadeChange;
CMap.clear();
@@ -133,6 +179,8 @@ bool ConstantMerge::runOnModule(Module &M) {
Replacements[i].first->replaceAllUsesWith(Replacements[i].second);
// Delete the global value from the module.
+ assert(Replacements[i].first->hasLocalLinkage() &&
+ "Refusing to delete an externally visible global variable.");
Replacements[i].first->eraseFromParent();
}
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 47df235..b423221 100644
--- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -39,7 +39,8 @@ using namespace llvm;
STATISTIC(NumArgumentsEliminated, "Number of unread args removed");
STATISTIC(NumRetValsEliminated , "Number of unused return values removed");
-
+STATISTIC(NumArgumentsReplacedWithUndef,
+ "Number of unread args replaced with undef");
namespace {
/// DAE - The dead argument elimination pass.
///
@@ -126,7 +127,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- DAE() : ModulePass(ID) {}
+ DAE() : ModulePass(ID) {
+ initializeDAEPass(*PassRegistry::getPassRegistry());
+ }
bool runOnModule(Module &M);
@@ -146,12 +149,13 @@ namespace {
void PropagateLiveness(const RetOrArg &RA);
bool RemoveDeadStuffFromFunction(Function *F);
bool DeleteDeadVarargs(Function &Fn);
+ bool RemoveDeadArgumentsFromCallers(Function &Fn);
};
}
char DAE::ID = 0;
-INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false);
+INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false)
namespace {
/// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but
@@ -168,7 +172,7 @@ namespace {
char DAH::ID = 0;
INITIALIZE_PASS(DAH, "deadarghaX0r",
"Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)",
- false, false);
+ false, false)
/// createDeadArgEliminationPass - This pass removes arguments from functions
/// which are not used by the body of the function.
@@ -285,6 +289,55 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
return true;
}
+/// RemoveDeadArgumentsFromCallers - Checks if the given function has any
+/// arguments that are unused, and changes the caller parameters to be undefined
+/// instead.
+bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
+{
+ if (Fn.isDeclaration())
+ return false;
+
+ // Functions with local linkage should already have been handled.
+ if (Fn.hasLocalLinkage())
+ return false;
+
+ if (Fn.use_empty())
+ return false;
+
+ llvm::SmallVector<unsigned, 8> UnusedArgs;
+ for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end();
+ I != E; ++I) {
+ Argument *Arg = I;
+
+ if (Arg->use_empty() && !Arg->hasByValAttr())
+ UnusedArgs.push_back(Arg->getArgNo());
+ }
+
+ if (UnusedArgs.empty())
+ return false;
+
+ bool Changed = false;
+
+ for (Function::use_iterator I = Fn.use_begin(), E = Fn.use_end();
+ I != E; ++I) {
+ CallSite CS(*I);
+ if (!CS || !CS.isCallee(I))
+ continue;
+
+ // Now go through all unused args and replace them with "undef".
+ for (unsigned I = 0, E = UnusedArgs.size(); I != E; ++I) {
+ unsigned ArgNo = UnusedArgs[I];
+
+ Value *Arg = CS.getArgument(ArgNo);
+ CS.setArgument(ArgNo, UndefValue::get(Arg->getType()));
+ ++NumArgumentsReplacedWithUndef;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
/// Convenience function that returns the number of return values. It returns 0
/// for void functions and 1 for functions not returning a struct. It returns
/// the number of struct elements for functions returning a struct.
@@ -791,7 +844,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
} else if (New->getType()->isVoidTy()) {
// Our return value has uses, but they will get removed later on.
// Replace by null for now.
- Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
+ if (!Call->getType()->isX86_MMXTy())
+ Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
} else {
assert(RetTy->isStructTy() &&
"Return type changed, but not into a void. The old return type"
@@ -854,7 +908,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
} else {
// If this argument is dead, replace any uses of it with null constants
// (these are guaranteed to become unused later on).
- I->replaceAllUsesWith(Constant::getNullValue(I->getType()));
+ if (!I->getType()->isX86_MMXTy())
+ I->replaceAllUsesWith(Constant::getNullValue(I->getType()));
}
// If we change the return value of the function we must rewrite any return
@@ -935,5 +990,14 @@ bool DAE::runOnModule(Module &M) {
Function *F = I++;
Changed |= RemoveDeadStuffFromFunction(F);
}
+
+ // Finally, look for any unused parameters in functions with non-local
+ // linkage and replace the passed in parameters with undef.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ Function& F = *I;
+
+ Changed |= RemoveDeadArgumentsFromCallers(F);
+ }
+
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp
index 5dc50c5..a509931 100644
--- a/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp
@@ -26,7 +26,9 @@ STATISTIC(NumKilled, "Number of unused typenames removed from symtab");
namespace {
struct DTE : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- DTE() : ModulePass(ID) {}
+ DTE() : ModulePass(ID) {
+ initializeDTEPass(*PassRegistry::getPassRegistry());
+ }
// doPassInitialization - For this pass, it removes global symbol table
// entries for primitive types. These are never used for linking in GCC and
@@ -45,7 +47,10 @@ namespace {
}
char DTE::ID = 0;
-INITIALIZE_PASS(DTE, "deadtypeelim", "Dead Type Elimination", false, false);
+INITIALIZE_PASS_BEGIN(DTE, "deadtypeelim", "Dead Type Elimination",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(FindUsedTypes)
+INITIALIZE_PASS_END(DTE, "deadtypeelim", "Dead Type Elimination", false, false)
ModulePass *llvm::createDeadTypeEliminationPass() {
return new DTE();
diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
index 45c5fe7..9d432de 100644
--- a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -50,24 +50,22 @@ namespace {
// Visit the GlobalVariables.
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
- if (!I->isDeclaration()) {
- if (I->hasLocalLinkage())
- I->setVisibility(GlobalValue::HiddenVisibility);
- I->setLinkage(GlobalValue::ExternalLinkage);
- if (deleteStuff == Named.count(I))
- I->setInitializer(0);
- }
+ I != E; ++I) {
+ if (I->hasLocalLinkage())
+ I->setVisibility(GlobalValue::HiddenVisibility);
+ I->setLinkage(GlobalValue::ExternalLinkage);
+ if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration())
+ I->setInitializer(0);
+ }
// Visit the Functions.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (!I->isDeclaration()) {
- if (I->hasLocalLinkage())
- I->setVisibility(GlobalValue::HiddenVisibility);
- I->setLinkage(GlobalValue::ExternalLinkage);
- if (deleteStuff == Named.count(I))
- I->deleteBody();
- }
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (I->hasLocalLinkage())
+ I->setVisibility(GlobalValue::HiddenVisibility);
+ I->setLinkage(GlobalValue::ExternalLinkage);
+ if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration())
+ I->deleteBody();
+ }
return true;
}
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 6165ba0..95decec 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -23,10 +23,10 @@
#include "llvm/CallGraphSCCPass.h"
#include "llvm/GlobalVariable.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/UniqueVector.h"
@@ -41,7 +41,9 @@ STATISTIC(NumNoAlias, "Number of function returns marked noalias");
namespace {
struct FunctionAttrs : public CallGraphSCCPass {
static char ID; // Pass identification, replacement for typeid
- FunctionAttrs() : CallGraphSCCPass(ID) {}
+ FunctionAttrs() : CallGraphSCCPass(ID), AA(0) {
+ initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
+ }
// runOnSCC - Analyze the SCC, performing the transformation if possible.
bool runOnSCC(CallGraphSCC &SCC);
@@ -61,67 +63,25 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
- bool PointsToLocalMemory(Value *V);
+ private:
+ AliasAnalysis *AA;
};
}
char FunctionAttrs::ID = 0;
-INITIALIZE_PASS(FunctionAttrs, "functionattrs",
- "Deduce function attributes", false, false);
+INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
+ "Deduce function attributes", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
+ "Deduce function attributes", false, false)
Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
-/// PointsToLocalMemory - Returns whether the given pointer value points to
-/// memory that is local to the function. Global constants are considered
-/// local to all functions.
-bool FunctionAttrs::PointsToLocalMemory(Value *V) {
- SmallVector<Value*, 16> Worklist;
- unsigned MaxLookup = 8;
-
- Worklist.push_back(V);
-
- do {
- V = Worklist.pop_back_val()->getUnderlyingObject();
-
- // An alloca instruction defines local memory.
- if (isa<AllocaInst>(V))
- continue;
-
- // A global constant counts as local memory for our purposes.
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
- if (!GV->isConstant())
- return false;
- continue;
- }
-
- // If both select values point to local memory, then so does the select.
- if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
- Worklist.push_back(SI->getTrueValue());
- Worklist.push_back(SI->getFalseValue());
- continue;
- }
-
- // If all values incoming to a phi node point to local memory, then so does
- // the phi.
- if (PHINode *PN = dyn_cast<PHINode>(V)) {
- // Don't bother inspecting phi nodes with many operands.
- if (PN->getNumIncomingValues() > MaxLookup)
- return false;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- Worklist.push_back(PN->getIncomingValue(i));
- continue;
- }
-
- return false;
- } while (!Worklist.empty() && --MaxLookup);
-
- return Worklist.empty();
-}
-
/// AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
SmallPtrSet<Function*, 8> SCCNodes;
@@ -141,14 +101,15 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
// External node - may write memory. Just give up.
return false;
- if (F->doesNotAccessMemory())
+ AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(F);
+ if (MRB == AliasAnalysis::DoesNotAccessMemory)
// Already perfect!
continue;
// Definitions with weak linkage may be overridden at linktime with
// something that writes memory, so treat them like declarations.
if (F->isDeclaration() || F->mayBeOverridden()) {
- if (!F->onlyReadsMemory())
+ if (!AliasAnalysis::onlyReadsMemory(MRB))
// May write memory. Just give up.
return false;
@@ -163,32 +124,62 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
// Some instructions can be ignored even if they read or write memory.
// Detect these now, skipping to the next instruction if one is found.
CallSite CS(cast<Value>(I));
- if (CS && CS.getCalledFunction()) {
+ if (CS) {
// Ignore calls to functions in the same SCC.
- if (SCCNodes.count(CS.getCalledFunction()))
+ if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
continue;
- // Ignore intrinsics that only access local memory.
- if (unsigned id = CS.getCalledFunction()->getIntrinsicID())
- if (AliasAnalysis::getIntrinsicModRefBehavior(id) ==
- AliasAnalysis::AccessesArguments) {
- // Check that all pointer arguments point to local memory.
+ AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(CS);
+ // If the call doesn't access arbitrary memory, we may be able to
+ // figure out something.
+ if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+ // If the call does access argument pointees, check each argument.
+ if (AliasAnalysis::doesAccessArgPointees(MRB))
+ // Check whether all pointer arguments point to local memory, and
+ // ignore calls that only access local memory.
for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
CI != CE; ++CI) {
Value *Arg = *CI;
- if (Arg->getType()->isPointerTy() && !PointsToLocalMemory(Arg))
- // Writes memory. Just give up.
- return false;
+ if (Arg->getType()->isPointerTy()) {
+ AliasAnalysis::Location Loc(Arg,
+ AliasAnalysis::UnknownSize,
+ I->getMetadata(LLVMContext::MD_tbaa));
+ if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) {
+ if (MRB & AliasAnalysis::Mod)
+ // Writes non-local memory. Give up.
+ return false;
+ if (MRB & AliasAnalysis::Ref)
+ // Ok, it reads non-local memory.
+ ReadsMemory = true;
+ }
+ }
}
- // Only reads and writes local memory.
- continue;
- }
- } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- // Ignore loads from local memory.
- if (PointsToLocalMemory(LI->getPointerOperand()))
continue;
+ }
+ // The call could access any memory. If that includes writes, give up.
+ if (MRB & AliasAnalysis::Mod)
+ return false;
+ // If it reads, note it.
+ if (MRB & AliasAnalysis::Ref)
+ ReadsMemory = true;
+ continue;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ // Ignore non-volatile loads from local memory.
+ if (!LI->isVolatile()) {
+ AliasAnalysis::Location Loc = AA->getLocation(LI);
+ if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ }
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- // Ignore stores to local memory.
- if (PointsToLocalMemory(SI->getPointerOperand()))
+ // Ignore non-volatile stores to local memory.
+ if (!SI->isVolatile()) {
+ AliasAnalysis::Location Loc = AA->getLocation(SI);
+ if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ }
+ } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
+ // Ignore vaargs on local memory.
+ AliasAnalysis::Location Loc = AA->getLocation(VI);
+ if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
}
@@ -198,10 +189,6 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
// Writes memory. Just give up.
return false;
- if (isMalloc(I))
- // malloc claims not to write memory! PR3754.
- return false;
-
// If this instruction may read memory, remember that.
ReadsMemory |= I->mayReadFromMemory();
}
@@ -384,6 +371,8 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
}
bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
+ AA = &getAnalysis<AliasAnalysis>();
+
bool Changed = AddReadAttrs(SCC);
Changed |= AddNoCaptureAttrs(SCC);
Changed |= AddNoAliasAttrs(SCC);
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index aa18601..2b427aa 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -31,7 +31,9 @@ STATISTIC(NumVariables, "Number of global variables removed");
namespace {
struct GlobalDCE : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- GlobalDCE() : ModulePass(ID) {}
+ GlobalDCE() : ModulePass(ID) {
+ initializeGlobalDCEPass(*PassRegistry::getPassRegistry());
+ }
// run - Do the GlobalDCE pass on the specified module, optionally updating
// the specified callgraph to reflect the changes.
@@ -52,7 +54,7 @@ namespace {
char GlobalDCE::ID = 0;
INITIALIZE_PASS(GlobalDCE, "globaldce",
- "Dead Global Elimination", false, false);
+ "Dead Global Elimination", false, false)
ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index a77af54..d4cb712 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -40,6 +40,7 @@
using namespace llvm;
STATISTIC(NumMarked , "Number of globals marked constant");
+STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr");
STATISTIC(NumSRA , "Number of aggregate globals broken into scalars");
STATISTIC(NumHeapSRA , "Number of heap objects SRA'd");
STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
@@ -55,11 +56,14 @@ STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
namespace {
+ struct GlobalStatus;
struct GlobalOpt : public ModulePass {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
}
static char ID; // Pass identification, replacement for typeid
- GlobalOpt() : ModulePass(ID) {}
+ GlobalOpt() : ModulePass(ID) {
+ initializeGlobalOptPass(*PassRegistry::getPassRegistry());
+ }
bool runOnModule(Module &M);
@@ -69,13 +73,16 @@ namespace {
bool OptimizeGlobalVars(Module &M);
bool OptimizeGlobalAliases(Module &M);
bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
- bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
+ bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
+ bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
+ const SmallPtrSet<const PHINode*, 16> &PHIUsers,
+ const GlobalStatus &GS);
};
}
char GlobalOpt::ID = 0;
INITIALIZE_PASS(GlobalOpt, "globalopt",
- "Global Variable Optimizer", false, false);
+ "Global Variable Optimizer", false, false)
ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
@@ -85,6 +92,9 @@ namespace {
/// about it. If we find out that the address of the global is taken, none of
/// this info will be accurate.
struct GlobalStatus {
+ /// isCompared - True if the global's address is used in a comparison.
+ bool isCompared;
+
/// isLoaded - True if the global is ever loaded. If the global isn't ever
/// loaded it can be deleted.
bool isLoaded;
@@ -129,10 +139,11 @@ struct GlobalStatus {
/// HasPHIUser - Set to true if this global has a user that is a PHI node.
bool HasPHIUser;
-
- GlobalStatus() : isLoaded(false), StoredType(NotStored), StoredOnceValue(0),
- AccessingFunction(0), HasMultipleAccessingFunctions(false),
- HasNonInstructionUser(false), HasPHIUser(false) {}
+
+ GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored),
+ StoredOnceValue(0), AccessingFunction(0),
+ HasMultipleAccessingFunctions(false), HasNonInstructionUser(false),
+ HasPHIUser(false) {}
};
}
@@ -165,6 +176,11 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
const User *U = *UI;
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
GS.HasNonInstructionUser = true;
+
+ // If the result of the constantexpr isn't pointer type, then we won't
+ // know to expect it in various places. Just reject early.
+ if (!isa<PointerType>(CE->getType())) return true;
+
if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
} else if (const Instruction *I = dyn_cast<Instruction>(U)) {
if (!GS.HasMultipleAccessingFunctions) {
@@ -221,7 +237,7 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
GS.HasPHIUser = true;
} else if (isa<CmpInst>(I)) {
- // Nothing to analyse.
+ GS.isCompared = true;
} else if (isa<MemTransferInst>(I)) {
const MemTransferInst *MTI = cast<MemTransferInst>(I);
if (MTI->getArgOperand(0) == V)
@@ -308,7 +324,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
if (Init)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
Changed |= CleanupConstantGlobalUsers(CE, SubInit);
- } else if (CE->getOpcode() == Instruction::BitCast &&
+ } else if (CE->getOpcode() == Instruction::BitCast &&
CE->getType()->isPointerTy()) {
// Pointer cast, delete any stores and memsets to the global.
Changed |= CleanupConstantGlobalUsers(CE, 0);
@@ -324,7 +340,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
// and will invalidate our notion of what Init is.
Constant *SubInit = 0;
if (!isa<ConstantExpr>(GEP->getOperand(0))) {
- ConstantExpr *CE =
+ ConstantExpr *CE =
dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP));
if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
@@ -361,7 +377,7 @@ static bool isSafeSROAElementUse(Value *V) {
// We might have a dead and dangling constant hanging off of here.
if (Constant *C = dyn_cast<Constant>(V))
return SafeToDestroyConstant(C);
-
+
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
@@ -371,15 +387,15 @@ static bool isSafeSROAElementUse(Value *V) {
// Stores *to* the pointer are ok.
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->getOperand(0) != V;
-
+
// Otherwise, it must be a GEP.
GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
if (GEPI == 0) return false;
-
+
if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
!cast<Constant>(GEPI->getOperand(1))->isNullValue())
return false;
-
+
for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end();
I != E; ++I)
if (!isSafeSROAElementUse(*I))
@@ -393,11 +409,11 @@ static bool isSafeSROAElementUse(Value *V) {
///
static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
// The user of the global must be a GEP Inst or a ConstantExpr GEP.
- if (!isa<GetElementPtrInst>(U) &&
- (!isa<ConstantExpr>(U) ||
+ if (!isa<GetElementPtrInst>(U) &&
+ (!isa<ConstantExpr>(U) ||
cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
return false;
-
+
// Check to see if this ConstantExpr GEP is SRA'able. In particular, we
// don't like < 3 operand CE's, and we don't like non-constant integer
// indices. This enforces that all uses are 'gep GV, 0, C, ...' for some
@@ -409,18 +425,18 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
++GEPI; // Skip over the pointer index.
-
+
// If this is a use of an array allocation, do a bit more checking for sanity.
if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
uint64_t NumElements = AT->getNumElements();
ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
-
+
// Check to make sure that index falls within the array. If not,
// something funny is going on, so we won't do the optimization.
//
if (Idx->getZExtValue() >= NumElements)
return false;
-
+
// We cannot scalar repl this level of the array unless any array
// sub-indices are in-range constants. In particular, consider:
// A[0][i]. We cannot know that the user isn't doing invalid things like
@@ -441,7 +457,7 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
"Indexed GEP type is not array, vector, or struct!");
continue;
}
-
+
ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
return false;
@@ -465,7 +481,7 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
}
return true;
}
-
+
/// SRAGlobal - Perform scalar replacement of aggregates on the specified global
/// variable. This opens the door for other optimizations by exposing the
@@ -476,7 +492,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
// Make sure this global only has simple uses that we can SRA.
if (!GlobalUsersSafeToSRA(GV))
return 0;
-
+
assert(GV->hasLocalLinkage() && !GV->isConstant());
Constant *Init = GV->getInitializer();
const Type *Ty = Init->getType();
@@ -488,7 +504,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
unsigned StartAlignment = GV->getAlignment();
if (StartAlignment == 0)
StartAlignment = TD.getABITypeAlignment(GV->getType());
-
+
if (const StructType *STy = dyn_cast<StructType>(Ty)) {
NewGlobals.reserve(STy->getNumElements());
const StructLayout &Layout = *TD.getStructLayout(STy);
@@ -503,7 +519,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
GV->getType()->getAddressSpace());
Globals.insert(GV, NGV);
NewGlobals.push_back(NGV);
-
+
// Calculate the known alignment of the field. If the original aggregate
// had 256 byte alignment for example, something might depend on that:
// propagate info to each field.
@@ -522,7 +538,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
if (NumElements > 16 && GV->hasNUsesOrMore(16))
return 0; // It's not worth it.
NewGlobals.reserve(NumElements);
-
+
uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType());
unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
for (unsigned i = 0, e = NumElements; i != e; ++i) {
@@ -537,7 +553,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
GV->getType()->getAddressSpace());
Globals.insert(GV, NGV);
NewGlobals.push_back(NGV);
-
+
// Calculate the known alignment of the field. If the original aggregate
// had 256 byte alignment for example, something might depend on that:
// propagate info to each field.
@@ -549,7 +565,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
if (NewGlobals.empty())
return 0;
-
+
DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
@@ -615,7 +631,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
}
/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
-/// value will trap if the value is dynamically null. PHIs keeps track of any
+/// value will trap if the value is dynamically null. PHIs keeps track of any
/// phi nodes we've seen to avoid reprocessing them.
static bool AllUsesOfValueWillTrapIfNull(const Value *V,
SmallPtrSet<const PHINode*, 8> &PHIs) {
@@ -757,7 +773,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
// Keep track of whether we are able to remove all the uses of the global
// other than the store that defines it.
bool AllNonStoreUsesGone = true;
-
+
// Replace all uses of loads with uses of uses of the stored value.
for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
User *GlobalUser = *GUI++;
@@ -830,7 +846,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
ConstantInt *NElements,
TargetData* TD) {
DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
-
+
const Type *GlobalType;
if (NElements->getZExtValue() == 1)
GlobalType = AllocTy;
@@ -840,14 +856,14 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
// Create the new global variable. The contents of the malloc'd memory is
// undefined, so initialize with an undef value.
- GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
+ GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
GlobalType, false,
GlobalValue::InternalLinkage,
UndefValue::get(GlobalType),
GV->getName()+".body",
GV,
GV->isThreadLocal());
-
+
// If there are bitcast users of the malloc (which is typical, usually we have
// a malloc + bitcast) then replace them with uses of the new global. Update
// other users to use the global as well.
@@ -867,10 +883,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
User->replaceUsesOfWith(CI, TheBC);
}
}
-
+
Constant *RepValue = NewGV;
if (NewGV->getType() != GV->getType()->getElementType())
- RepValue = ConstantExpr::getBitCast(RepValue,
+ RepValue = ConstantExpr::getBitCast(RepValue,
GV->getType()->getElementType());
// If there is a comparison against null, we will insert a global bool to
@@ -890,7 +906,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
SI->eraseFromParent();
continue;
}
-
+
LoadInst *LI = cast<LoadInst>(GV->use_back());
while (!LI->use_empty()) {
Use &LoadUse = LI->use_begin().getUse();
@@ -898,7 +914,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
LoadUse = RepValue;
continue;
}
-
+
ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
// Replace the cmp X, 0 with a use of the bool value.
Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI);
@@ -963,20 +979,20 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
continue; // Fine, ignore.
}
-
+
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
return false; // Storing the pointer itself... bad.
continue; // Otherwise, storing through it, or storing into GV... fine.
}
-
+
// Must index into the array and into the struct.
if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
return false;
continue;
}
-
+
if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
// PHIs are ok if all uses are ok. Don't infinitely recurse through PHI
// cycles.
@@ -985,13 +1001,13 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
return false;
continue;
}
-
+
if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
return false;
continue;
}
-
+
return false;
}
return true;
@@ -1000,9 +1016,9 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
/// somewhere. Transform all uses of the allocation into loads from the
/// global and uses of the resultant pointer. Further, delete the store into
-/// GV. This assumes that these value pass the
+/// GV. This assumes that these value pass the
/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
-static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
+static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
GlobalVariable *GV) {
while (!Alloc->use_empty()) {
Instruction *U = cast<Instruction>(*Alloc->use_begin());
@@ -1035,7 +1051,7 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
continue;
}
}
-
+
// Insert a load from the global, and use it instead of the malloc.
Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
U->replaceUsesOfWith(Alloc, NL);
@@ -1053,24 +1069,24 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
++UI) {
const Instruction *User = cast<Instruction>(*UI);
-
+
// Comparison against null is ok.
if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) {
if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
return false;
continue;
}
-
+
// getelementptr is also ok, but only a simple form.
if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
// Must index into the array and into the struct.
if (GEPI->getNumOperands() < 3)
return false;
-
+
// Otherwise the GEP is ok.
continue;
}
-
+
if (const PHINode *PN = dyn_cast<PHINode>(User)) {
if (!LoadUsingPHIsPerLoad.insert(PN))
// This means some phi nodes are dependent on each other.
@@ -1079,19 +1095,19 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
if (!LoadUsingPHIs.insert(PN))
// If we have already analyzed this PHI, then it is safe.
continue;
-
+
// Make sure all uses of the PHI are simple enough to transform.
if (!LoadUsesSimpleEnoughForHeapSRA(PN,
LoadUsingPHIs, LoadUsingPHIsPerLoad))
return false;
-
+
continue;
}
-
+
// Otherwise we don't know what this is, not ok.
return false;
}
-
+
return true;
}
@@ -1110,10 +1126,10 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
return false;
LoadUsingPHIsPerLoad.clear();
}
-
+
// If we reach here, we know that all uses of the loads and transitive uses
// (through PHI nodes) are simple enough to transform. However, we don't know
- // that all inputs the to the PHI nodes are in the same equivalence sets.
+ // that all inputs the to the PHI nodes are in the same equivalence sets.
// Check to verify that all operands of the PHIs are either PHIS that can be
// transformed, loads from GV, or MI itself.
for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin()
@@ -1121,29 +1137,29 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
const PHINode *PN = *I;
for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
Value *InVal = PN->getIncomingValue(op);
-
+
// PHI of the stored value itself is ok.
if (InVal == StoredVal) continue;
-
+
if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
// One of the PHIs in our set is (optimistically) ok.
if (LoadUsingPHIs.count(InPN))
continue;
return false;
}
-
+
// Load from GV is ok.
if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
if (LI->getOperand(0) == GV)
continue;
-
+
// UNDEF? NULL?
-
+
// Anything else is rejected.
return false;
}
}
-
+
return true;
}
@@ -1151,15 +1167,15 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
-
+
if (FieldNo >= FieldVals.size())
FieldVals.resize(FieldNo+1);
-
+
// If we already have this value, just reuse the previously scalarized
// version.
if (Value *FieldVal = FieldVals[FieldNo])
return FieldVal;
-
+
// Depending on what instruction this is, we have several cases.
Value *Result;
if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
@@ -1172,9 +1188,9 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
} else if (PHINode *PN = dyn_cast<PHINode>(V)) {
// PN's type is pointer to struct. Make a new PHI of pointer to struct
// field.
- const StructType *ST =
+ const StructType *ST =
cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
-
+
Result =
PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
PN->getName()+".f"+Twine(FieldNo), PN);
@@ -1183,13 +1199,13 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
llvm_unreachable("Unknown usable value");
Result = 0;
}
-
+
return FieldVals[FieldNo] = Result;
}
/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
/// the load, rewrite the derived value to use the HeapSRoA'd load.
-static void RewriteHeapSROALoadUser(Instruction *LoadUser,
+static void RewriteHeapSROALoadUser(Instruction *LoadUser,
DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
// If this is a comparison against null, handle it.
@@ -1199,30 +1215,30 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
// field.
Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
InsertedScalarizedValues, PHIsToRewrite);
-
+
Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
- Constant::getNullValue(NPtr->getType()),
+ Constant::getNullValue(NPtr->getType()),
SCI->getName());
SCI->replaceAllUsesWith(New);
SCI->eraseFromParent();
return;
}
-
+
// Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
&& "Unexpected GEPI!");
-
+
// Load the pointer for this field.
unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
InsertedScalarizedValues, PHIsToRewrite);
-
+
// Create the new GEP idx vector.
SmallVector<Value*, 8> GEPIdx;
GEPIdx.push_back(GEPI->getOperand(1));
GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
-
+
Value *NGEPI = GetElementPtrInst::Create(NewPtr,
GEPIdx.begin(), GEPIdx.end(),
GEPI->getName(), GEPI);
@@ -1243,7 +1259,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
tie(InsertPos, Inserted) =
InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>()));
if (!Inserted) return;
-
+
// If this is the first time we've seen this PHI, recursively process all
// users.
for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
@@ -1256,7 +1272,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
/// is a value loaded from the global. Eliminate all uses of Ptr, making them
/// use FieldGlobals instead. All uses of loaded values satisfy
/// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
-static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
+static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
@@ -1264,7 +1280,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
Instruction *User = cast<Instruction>(*UI++);
RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
}
-
+
if (Load->use_empty()) {
Load->eraseFromParent();
InsertedScalarizedValues.erase(Load);
@@ -1289,11 +1305,11 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
// new mallocs at the same place as CI, and N globals.
std::vector<Value*> FieldGlobals;
std::vector<Value*> FieldMallocs;
-
+
for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
const Type *FieldTy = STy->getElementType(FieldNo);
const PointerType *PFieldTy = PointerType::getUnqual(FieldTy);
-
+
GlobalVariable *NGV =
new GlobalVariable(*GV->getParent(),
PFieldTy, false, GlobalValue::InternalLinkage,
@@ -1301,7 +1317,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
GV->getName() + ".f" + Twine(FieldNo), GV,
GV->isThreadLocal());
FieldGlobals.push_back(NGV);
-
+
unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
if (const StructType *ST = dyn_cast<StructType>(FieldTy))
TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
@@ -1313,7 +1329,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
FieldMallocs.push_back(NMI);
new StoreInst(NMI, NGV, CI);
}
-
+
// The tricky aspect of this transformation is handling the case when malloc
// fails. In the original code, malloc failing would set the result pointer
// of malloc to null. In this case, some mallocs could succeed and others
@@ -1340,23 +1356,23 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
// Split the basic block at the old malloc.
BasicBlock *OrigBB = CI->getParent();
BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
-
+
// Create the block to check the first condition. Put all these blocks at the
// end of the function as they are unlikely to be executed.
BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
"malloc_ret_null",
OrigBB->getParent());
-
+
// Remove the uncond branch from OrigBB to ContBB, turning it into a cond
// branch on RunningOr.
OrigBB->getTerminator()->eraseFromParent();
BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
-
+
// Within the NullPtrBlock, we need to emit a comparison and branch for each
// pointer, because some may be null while others are not.
for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
- Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
+ Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
Constant::getNullValue(GVVal->getType()),
"tmp");
BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
@@ -1371,10 +1387,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
FreeBlock);
BranchInst::Create(NextBlock, FreeBlock);
-
+
NullPtrBlock = NextBlock;
}
-
+
BranchInst::Create(ContBB, NullPtrBlock);
// CI is no longer needed, remove it.
@@ -1385,25 +1401,25 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
/// inserted for a given load.
DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
InsertedScalarizedValues[GV] = FieldGlobals;
-
+
std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
-
+
// Okay, the malloc site is completely handled. All of the uses of GV are now
// loads, and all uses of those loads are simple. Rewrite them to use loads
// of the per-field globals instead.
for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
Instruction *User = cast<Instruction>(*UI++);
-
+
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
continue;
}
-
+
// Must be a store of null.
StoreInst *SI = cast<StoreInst>(User);
assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
"Unexpected heap-sra user!");
-
+
// Insert a store of null into each global.
for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
@@ -1430,7 +1446,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
}
}
-
+
// Drop all inter-phi links and any loads that made it this far.
for (DenseMap<Value*, std::vector<Value*> >::iterator
I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
@@ -1440,7 +1456,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
LI->dropAllReferences();
}
-
+
// Delete all the phis and loads now that inter-references are dead.
for (DenseMap<Value*, std::vector<Value*> >::iterator
I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
@@ -1450,7 +1466,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
LI->eraseFromParent();
}
-
+
// The old global is now dead, remove it.
GV->eraseFromParent();
@@ -1468,7 +1484,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
TargetData *TD) {
if (!TD)
return false;
-
+
// If this is a malloc of an abstract type, don't touch it.
if (!AllocTy->isSized())
return false;
@@ -1508,7 +1524,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD);
return true;
}
-
+
// If the allocation is an array of structures, consider transforming this
// into multiple malloc'd arrays, one for each field. This is basically
// SRoA for malloc'd memory.
@@ -1544,13 +1560,13 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
CI = dyn_cast<BitCastInst>(Malloc) ?
extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc);
}
-
+
GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD);
return true;
}
-
+
return false;
-}
+}
// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
// that only one value (besides its initializer) is ever stored to the global.
@@ -1568,7 +1584,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
GV->getInitializer()->isNullValue()) {
if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
if (GV->getInitializer()->getType() != SOVC->getType())
- SOVC =
+ SOVC =
ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
// Optimize away any trapping uses of the loaded value.
@@ -1576,7 +1592,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
return true;
} else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
const Type* MallocType = getMallocAllocatedType(CI);
- if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
+ if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
GVI, TD))
return true;
}
@@ -1591,7 +1607,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
/// whenever it is used. This exposes the values to other scalar optimizations.
static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
const Type *GVElType = GV->getType()->getElementType();
-
+
// If GVElType is already i1, it is already shrunk. If the type of the GV is
// an FP value, pointer or vector, don't do this optimization because a select
// between them is very expensive and unlikely to lead to later
@@ -1611,11 +1627,11 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
}
DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV);
-
+
// Create the new global, initializing it to false.
GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
false,
- GlobalValue::InternalLinkage,
+ GlobalValue::InternalLinkage,
ConstantInt::getFalse(GV->getContext()),
GV->getName()+".b",
GV->isThreadLocal());
@@ -1684,10 +1700,12 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
/// ProcessInternalGlobal - Analyze the specified global variable and optimize
/// it if possible. If we make a change, return true.
-bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
- Module::global_iterator &GVI) {
- SmallPtrSet<const PHINode*, 16> PHIUsers;
- GlobalStatus GS;
+bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
+ Module::global_iterator &GVI) {
+ if (!GV->hasLocalLinkage())
+ return false;
+
+ // Do more involved optimizations if the global is internal.
GV->removeDeadConstantUsers();
if (GV->use_empty()) {
@@ -1697,140 +1715,139 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
return true;
}
- if (!AnalyzeGlobal(GV, GS, PHIUsers)) {
-#if 0
- DEBUG(dbgs() << "Global: " << *GV);
- DEBUG(dbgs() << " isLoaded = " << GS.isLoaded << "\n");
- DEBUG(dbgs() << " StoredType = ");
- switch (GS.StoredType) {
- case GlobalStatus::NotStored: DEBUG(dbgs() << "NEVER STORED\n"); break;
- case GlobalStatus::isInitializerStored: DEBUG(dbgs() << "INIT STORED\n");
- break;
- case GlobalStatus::isStoredOnce: DEBUG(dbgs() << "STORED ONCE\n"); break;
- case GlobalStatus::isStored: DEBUG(dbgs() << "stored\n"); break;
- }
- if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue)
- DEBUG(dbgs() << " StoredOnceValue = " << *GS.StoredOnceValue << "\n");
- if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions)
- DEBUG(dbgs() << " AccessingFunction = "
- << GS.AccessingFunction->getName() << "\n");
- DEBUG(dbgs() << " HasMultipleAccessingFunctions = "
- << GS.HasMultipleAccessingFunctions << "\n");
- DEBUG(dbgs() << " HasNonInstructionUser = "
- << GS.HasNonInstructionUser<<"\n");
- DEBUG(dbgs() << "\n");
-#endif
-
- // If this is a first class global and has only one accessing function
- // and this function is main (which we know is not recursive we can make
- // this global a local variable) we replace the global with a local alloca
- // in this function.
- //
- // NOTE: It doesn't make sense to promote non single-value types since we
- // are just replacing static memory to stack memory.
- //
- // If the global is in different address space, don't bring it to stack.
- if (!GS.HasMultipleAccessingFunctions &&
- GS.AccessingFunction && !GS.HasNonInstructionUser &&
- GV->getType()->getElementType()->isSingleValueType() &&
- GS.AccessingFunction->getName() == "main" &&
- GS.AccessingFunction->hasExternalLinkage() &&
- GV->getType()->getAddressSpace() == 0) {
- DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
- Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction
- ->getEntryBlock().begin());
- const Type* ElemTy = GV->getType()->getElementType();
- // FIXME: Pass Global's alignment when globals have alignment
- AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
- if (!isa<UndefValue>(GV->getInitializer()))
- new StoreInst(GV->getInitializer(), Alloca, &FirstI);
-
- GV->replaceAllUsesWith(Alloca);
+ SmallPtrSet<const PHINode*, 16> PHIUsers;
+ GlobalStatus GS;
+
+ if (AnalyzeGlobal(GV, GS, PHIUsers))
+ return false;
+
+ if (!GS.isCompared && !GV->hasUnnamedAddr()) {
+ GV->setUnnamedAddr(true);
+ NumUnnamed++;
+ }
+
+ if (GV->isConstant() || !GV->hasInitializer())
+ return false;
+
+ return ProcessInternalGlobal(GV, GVI, PHIUsers, GS);
+}
+
+/// ProcessInternalGlobal - Analyze the specified global variable and optimize
+/// it if possible. If we make a change, return true.
+bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
+ Module::global_iterator &GVI,
+ const SmallPtrSet<const PHINode*, 16> &PHIUsers,
+ const GlobalStatus &GS) {
+ // If this is a first class global and has only one accessing function
+ // and this function is main (which we know is not recursive we can make
+ // this global a local variable) we replace the global with a local alloca
+ // in this function.
+ //
+ // NOTE: It doesn't make sense to promote non single-value types since we
+ // are just replacing static memory to stack memory.
+ //
+ // If the global is in different address space, don't bring it to stack.
+ if (!GS.HasMultipleAccessingFunctions &&
+ GS.AccessingFunction && !GS.HasNonInstructionUser &&
+ GV->getType()->getElementType()->isSingleValueType() &&
+ GS.AccessingFunction->getName() == "main" &&
+ GS.AccessingFunction->hasExternalLinkage() &&
+ GV->getType()->getAddressSpace() == 0) {
+ DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
+ Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction
+ ->getEntryBlock().begin());
+ const Type* ElemTy = GV->getType()->getElementType();
+ // FIXME: Pass Global's alignment when globals have alignment
+ AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
+ if (!isa<UndefValue>(GV->getInitializer()))
+ new StoreInst(GV->getInitializer(), Alloca, &FirstI);
+
+ GV->replaceAllUsesWith(Alloca);
+ GV->eraseFromParent();
+ ++NumLocalized;
+ return true;
+ }
+
+ // If the global is never loaded (but may be stored to), it is dead.
+ // Delete it now.
+ if (!GS.isLoaded) {
+ DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
+
+ // Delete any stores we can find to the global. We may not be able to
+ // make it completely dead though.
+ bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer());
+
+ // If the global is dead now, delete it.
+ if (GV->use_empty()) {
GV->eraseFromParent();
- ++NumLocalized;
- return true;
+ ++NumDeleted;
+ Changed = true;
}
-
- // If the global is never loaded (but may be stored to), it is dead.
- // Delete it now.
- if (!GS.isLoaded) {
- DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
-
- // Delete any stores we can find to the global. We may not be able to
- // make it completely dead though.
- bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer());
-
- // If the global is dead now, delete it.
- if (GV->use_empty()) {
- GV->eraseFromParent();
- ++NumDeleted;
- Changed = true;
- }
- return Changed;
+ return Changed;
- } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
- DEBUG(dbgs() << "MARKING CONSTANT: " << *GV);
- GV->setConstant(true);
+ } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
+ DEBUG(dbgs() << "MARKING CONSTANT: " << *GV);
+ GV->setConstant(true);
- // Clean up any obviously simplifiable users now.
- CleanupConstantGlobalUsers(GV, GV->getInitializer());
+ // Clean up any obviously simplifiable users now.
+ CleanupConstantGlobalUsers(GV, GV->getInitializer());
- // If the global is dead now, just nuke it.
- if (GV->use_empty()) {
- DEBUG(dbgs() << " *** Marking constant allowed us to simplify "
- << "all users and delete global!\n");
- GV->eraseFromParent();
- ++NumDeleted;
+ // If the global is dead now, just nuke it.
+ if (GV->use_empty()) {
+ DEBUG(dbgs() << " *** Marking constant allowed us to simplify "
+ << "all users and delete global!\n");
+ GV->eraseFromParent();
+ ++NumDeleted;
+ }
+
+ ++NumMarked;
+ return true;
+ } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
+ if (TargetData *TD = getAnalysisIfAvailable<TargetData>())
+ if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) {
+ GVI = FirstNewGV; // Don't skip the newly produced globals!
+ return true;
+ }
+ } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
+ // If the initial value for the global was an undef value, and if only
+ // one other value was stored into it, we can just change the
+ // initializer to be the stored value, then delete all stores to the
+ // global. This allows us to mark it constant.
+ if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
+ if (isa<UndefValue>(GV->getInitializer())) {
+ // Change the initial value here.
+ GV->setInitializer(SOVConstant);
+
+ // Clean up any obviously simplifiable users now.
+ CleanupConstantGlobalUsers(GV, GV->getInitializer());
+
+ if (GV->use_empty()) {
+ DEBUG(dbgs() << " *** Substituting initializer allowed us to "
+ << "simplify all users and delete global!\n");
+ GV->eraseFromParent();
+ ++NumDeleted;
+ } else {
+ GVI = GV;
+ }
+ ++NumSubstitute;
+ return true;
}
- ++NumMarked;
+ // Try to optimize globals based on the knowledge that only one value
+ // (besides its initializer) is ever stored to the global.
+ if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI,
+ getAnalysisIfAvailable<TargetData>()))
return true;
- } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
- if (TargetData *TD = getAnalysisIfAvailable<TargetData>())
- if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) {
- GVI = FirstNewGV; // Don't skip the newly produced globals!
- return true;
- }
- } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
- // If the initial value for the global was an undef value, and if only
- // one other value was stored into it, we can just change the
- // initializer to be the stored value, then delete all stores to the
- // global. This allows us to mark it constant.
- if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
- if (isa<UndefValue>(GV->getInitializer())) {
- // Change the initial value here.
- GV->setInitializer(SOVConstant);
-
- // Clean up any obviously simplifiable users now.
- CleanupConstantGlobalUsers(GV, GV->getInitializer());
-
- if (GV->use_empty()) {
- DEBUG(dbgs() << " *** Substituting initializer allowed us to "
- << "simplify all users and delete global!\n");
- GV->eraseFromParent();
- ++NumDeleted;
- } else {
- GVI = GV;
- }
- ++NumSubstitute;
- return true;
- }
- // Try to optimize globals based on the knowledge that only one value
- // (besides its initializer) is ever stored to the global.
- if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI,
- getAnalysisIfAvailable<TargetData>()))
+ // Otherwise, if the global was not a boolean, we can shrink it to be a
+ // boolean.
+ if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
+ if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
+ ++NumShrunkToBool;
return true;
-
- // Otherwise, if the global was not a boolean, we can shrink it to be a
- // boolean.
- if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
- if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
- ++NumShrunkToBool;
- return true;
- }
- }
+ }
}
+
return false;
}
@@ -1917,10 +1934,8 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
if (New && New != CE)
GV->setInitializer(New);
}
- // Do more involved optimizations if the global is internal.
- if (!GV->isConstant() && GV->hasLocalLinkage() &&
- GV->hasInitializer())
- Changed |= ProcessInternalGlobal(GV, GVI);
+
+ Changed |= ProcessGlobal(GV, GVI);
}
return Changed;
}
@@ -1928,46 +1943,47 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
/// FindGlobalCtors - Find the llvm.globalctors list, verifying that all
/// initializers have an init priority of 65535.
GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
- if (I->getName() == "llvm.global_ctors") {
- // Found it, verify it's an array of { int, void()* }.
- const ArrayType *ATy =dyn_cast<ArrayType>(I->getType()->getElementType());
- if (!ATy) return 0;
- const StructType *STy = dyn_cast<StructType>(ATy->getElementType());
- if (!STy || STy->getNumElements() != 2 ||
- !STy->getElementType(0)->isIntegerTy(32)) return 0;
- const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1));
- if (!PFTy) return 0;
- const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType());
- if (!FTy || !FTy->getReturnType()->isVoidTy() ||
- FTy->isVarArg() || FTy->getNumParams() != 0)
- return 0;
-
- // Verify that the initializer is simple enough for us to handle.
- if (!I->hasDefinitiveInitializer()) return 0;
- ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer());
- if (!CA) return 0;
- for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
- if (ConstantStruct *CS = dyn_cast<ConstantStruct>(*i)) {
- if (isa<ConstantPointerNull>(CS->getOperand(1)))
- continue;
+ GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+ if (GV == 0) return 0;
+
+ // Found it, verify it's an array of { int, void()* }.
+ const ArrayType *ATy =dyn_cast<ArrayType>(GV->getType()->getElementType());
+ if (!ATy) return 0;
+ const StructType *STy = dyn_cast<StructType>(ATy->getElementType());
+ if (!STy || STy->getNumElements() != 2 ||
+ !STy->getElementType(0)->isIntegerTy(32)) return 0;
+ const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1));
+ if (!PFTy) return 0;
+ const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType());
+ if (!FTy || !FTy->getReturnType()->isVoidTy() ||
+ FTy->isVarArg() || FTy->getNumParams() != 0)
+ return 0;
- // Must have a function or null ptr.
- if (!isa<Function>(CS->getOperand(1)))
- return 0;
-
- // Init priority must be standard.
- ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0));
- if (!CI || CI->getZExtValue() != 65535)
- return 0;
- } else {
- return 0;
- }
-
- return I;
- }
- return 0;
+ // Verify that the initializer is simple enough for us to handle. We are
+ // only allowed to optimize the initializer if it is unique.
+ if (!GV->hasUniqueInitializer()) return 0;
+
+ ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (!CA) return 0;
+
+ for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
+ ConstantStruct *CS = dyn_cast<ConstantStruct>(*i);
+ if (CS == 0) return 0;
+
+ if (isa<ConstantPointerNull>(CS->getOperand(1)))
+ continue;
+
+ // Must have a function or null ptr.
+ if (!isa<Function>(CS->getOperand(1)))
+ return 0;
+
+ // Init priority must be standard.
+ ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0));
+ if (!CI || CI->getZExtValue() != 65535)
+ return 0;
+ }
+
+ return GV;
}
/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand,
@@ -1985,13 +2001,13 @@ static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
/// specified array, returning the new global to use.
-static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
+static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
const std::vector<Function*> &Ctors) {
// If we made a change, reassemble the initializer list.
std::vector<Constant*> CSVals;
CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535));
CSVals.push_back(0);
-
+
// Create the new init list.
std::vector<Constant*> CAList;
for (unsigned i = 0, e = Ctors.size(); i != e; ++i) {
@@ -2007,26 +2023,26 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
}
CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false));
}
-
+
// Create the array initializer.
const Type *StructTy =
cast<ArrayType>(GCL->getType()->getElementType())->getElementType();
- Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
+ Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
CAList.size()), CAList);
-
+
// If we didn't change the number of elements, don't create a new GV.
if (CA->getType() == GCL->getInitializer()->getType()) {
GCL->setInitializer(CA);
return GCL;
}
-
+
// Create the new global and insert it next to the existing list.
GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
GCL->getLinkage(), CA, "",
GCL->isThreadLocal());
GCL->getParent()->getGlobalList().insert(GCL, NGV);
NGV->takeName(GCL);
-
+
// Nuke the old list, replacing any uses with the new one.
if (!GCL->use_empty()) {
Constant *V = NGV;
@@ -2035,7 +2051,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
GCL->replaceAllUsesWith(V);
}
GCL->eraseFromParent();
-
+
if (Ctors.size())
return NGV;
else
@@ -2043,17 +2059,86 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
}
-static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues,
- Value *V) {
+static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues, Value *V) {
if (Constant *CV = dyn_cast<Constant>(V)) return CV;
Constant *R = ComputedValues[V];
assert(R && "Reference to an uncomputed value!");
return R;
}
+static inline bool
+isSimpleEnoughValueToCommit(Constant *C,
+ SmallPtrSet<Constant*, 8> &SimpleConstants);
+
+
+/// isSimpleEnoughValueToCommit - Return true if the specified constant can be
+/// handled by the code generator. We don't want to generate something like:
+/// void *X = &X/42;
+/// because the code generator doesn't have a relocation that can handle that.
+///
+/// This function should be called if C was not found (but just got inserted)
+/// in SimpleConstants to avoid having to rescan the same constants all the
+/// time.
+static bool isSimpleEnoughValueToCommitHelper(Constant *C,
+ SmallPtrSet<Constant*, 8> &SimpleConstants) {
+ // Simple integer, undef, constant aggregate zero, global addresses, etc are
+ // all supported.
+ if (C->getNumOperands() == 0 || isa<BlockAddress>(C) ||
+ isa<GlobalValue>(C))
+ return true;
+
+ // Aggregate values are safe if all their elements are.
+ if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
+ isa<ConstantVector>(C)) {
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+ Constant *Op = cast<Constant>(C->getOperand(i));
+ if (!isSimpleEnoughValueToCommit(Op, SimpleConstants))
+ return false;
+ }
+ return true;
+ }
+
+ // We don't know exactly what relocations are allowed in constant expressions,
+ // so we allow &global+constantoffset, which is safe and uniformly supported
+ // across targets.
+ ConstantExpr *CE = cast<ConstantExpr>(C);
+ switch (CE->getOpcode()) {
+ case Instruction::BitCast:
+ case Instruction::IntToPtr:
+ case Instruction::PtrToInt:
+ // These casts are always fine if the casted value is.
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+
+ // GEP is fine if it is simple + constant offset.
+ case Instruction::GetElementPtr:
+ for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
+ if (!isa<ConstantInt>(CE->getOperand(i)))
+ return false;
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+
+ case Instruction::Add:
+ // We allow simple+cst.
+ if (!isa<ConstantInt>(CE->getOperand(1)))
+ return false;
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+ }
+ return false;
+}
+
+static inline bool
+isSimpleEnoughValueToCommit(Constant *C,
+ SmallPtrSet<Constant*, 8> &SimpleConstants) {
+ // If we already checked this constant, we win.
+ if (!SimpleConstants.insert(C)) return true;
+ // Check the constant.
+ return isSimpleEnoughValueToCommitHelper(C, SimpleConstants);
+}
+
+
/// isSimpleEnoughPointerToCommit - Return true if this constant is simple
-/// enough for us to understand. In particular, if it is a cast of something,
-/// we punt. We basically just support direct accesses to globals and GEP's of
+/// enough for us to understand. In particular, if it is a cast to anything
+/// other than from one pointer type to another pointer type, we punt.
+/// We basically just support direct accesses to globals and GEP's of
/// globals. This should be kept up to date with CommitValueTo.
static bool isSimpleEnoughPointerToCommit(Constant *C) {
// Conservatively, avoid aggregate types. This is because we don't
@@ -2062,19 +2147,19 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
return false;
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
- // Do not allow weak/linkonce/dllimport/dllexport linkage or
+ // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
// external globals.
- return GV->hasDefinitiveInitializer();
+ return GV->hasUniqueInitializer();
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
// Handle a constantexpr gep.
if (CE->getOpcode() == Instruction::GetElementPtr &&
isa<GlobalVariable>(CE->getOperand(0)) &&
cast<GEPOperator>(CE)->isInBounds()) {
GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
- // Do not allow weak/linkonce/dllimport/dllexport linkage or
+ // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
// external globals.
- if (!GV->hasDefinitiveInitializer())
+ if (!GV->hasUniqueInitializer())
return false;
// The first index must be zero.
@@ -2087,7 +2172,18 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
return false;
return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+
+ // A constantexpr bitcast from a pointer to another pointer is a no-op,
+ // and we know how to evaluate it by moving the bitcast from the pointer
+ // operand to the value operand.
+ } else if (CE->getOpcode() == Instruction::BitCast &&
+ isa<GlobalVariable>(CE->getOperand(0))) {
+ // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+ // external globals.
+ return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
}
+ }
+
return false;
}
@@ -2101,7 +2197,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
assert(Val->getType() == Init->getType() && "Type mismatch!");
return Val;
}
-
+
std::vector<Constant*> Elts;
if (const StructType *STy = dyn_cast<StructType>(Init->getType())) {
@@ -2119,13 +2215,13 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
llvm_unreachable("This code is out of sync with "
" ConstantFoldLoadThroughGEPConstantExpr");
}
-
+
// Replace the element that we are supposed to.
ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
unsigned Idx = CU->getZExtValue();
assert(Idx < STy->getNumElements() && "Struct index out of range!");
Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
-
+
// Return the modified struct.
return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(),
STy->isPacked());
@@ -2138,8 +2234,8 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
NumElts = ATy->getNumElements();
else
NumElts = cast<VectorType>(InitTy)->getNumElements();
-
-
+
+
// Break up the array into elements.
if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
@@ -2154,16 +2250,15 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
" ConstantFoldLoadThroughGEPConstantExpr");
Elts.assign(NumElts, UndefValue::get(InitTy->getElementType()));
}
-
+
assert(CI->getZExtValue() < NumElts);
Elts[CI->getZExtValue()] =
EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
-
+
if (Init->getType()->isArrayTy())
return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
- else
- return ConstantVector::get(&Elts[0], Elts.size());
- }
+ return ConstantVector::get(Elts);
+ }
}
/// CommitValueTo - We have decided that Addr (which satisfies the predicate
@@ -2189,14 +2284,14 @@ static Constant *ComputeLoadResult(Constant *P,
// is the most up-to-date.
DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P);
if (I != Memory.end()) return I->second;
-
+
// Access it.
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
if (GV->hasDefinitiveInitializer())
return GV->getInitializer();
return 0;
}
-
+
// Handle a constantexpr getelementptr.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
if (CE->getOpcode() == Instruction::GetElementPtr &&
@@ -2216,17 +2311,19 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
const SmallVectorImpl<Constant*> &ActualArgs,
std::vector<Function*> &CallStack,
DenseMap<Constant*, Constant*> &MutatedMemory,
- std::vector<GlobalVariable*> &AllocaTmps) {
+ std::vector<GlobalVariable*> &AllocaTmps,
+ SmallPtrSet<Constant*, 8> &SimpleConstants,
+ const TargetData *TD) {
// Check to see if this function is already executing (recursion). If so,
// bail out. TODO: we might want to accept limited recursion.
if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
return false;
-
+
CallStack.push_back(F);
-
+
/// Values - As we compute SSA register values, we store their contents here.
DenseMap<Value*, Constant*> Values;
-
+
// Initialize arguments to the incoming values specified.
unsigned ArgNo = 0;
for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
@@ -2237,21 +2334,65 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
/// we can only evaluate any one basic block at most once. This set keeps
/// track of what we have executed so we can detect recursive cases etc.
SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
-
+
// CurInst - The current instruction we're evaluating.
BasicBlock::iterator CurInst = F->begin()->begin();
-
+
// This is the main evaluation loop.
while (1) {
Constant *InstResult = 0;
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
if (SI->isVolatile()) return false; // no volatile accesses.
Constant *Ptr = getVal(Values, SI->getOperand(1));
if (!isSimpleEnoughPointerToCommit(Ptr))
// If this is too complex for us to commit, reject it.
return false;
+
Constant *Val = getVal(Values, SI->getOperand(0));
+
+ // If this might be too difficult for the backend to handle (e.g. the addr
+ // of one global variable divided by another) then we can't commit it.
+ if (!isSimpleEnoughValueToCommit(Val, SimpleConstants))
+ return false;
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+ if (CE->getOpcode() == Instruction::BitCast) {
+ // If we're evaluating a store through a bitcast, then we need
+ // to pull the bitcast off the pointer type and push it onto the
+ // stored value.
+ Ptr = CE->getOperand(0);
+
+ const Type *NewTy=cast<PointerType>(Ptr->getType())->getElementType();
+
+ // In order to push the bitcast onto the stored value, a bitcast
+ // from NewTy to Val's type must be legal. If it's not, we can try
+ // introspecting NewTy to find a legal conversion.
+ while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) {
+ // If NewTy is a struct, we can convert the pointer to the struct
+ // into a pointer to its first member.
+ // FIXME: This could be extended to support arrays as well.
+ if (const StructType *STy = dyn_cast<StructType>(NewTy)) {
+ NewTy = STy->getTypeAtIndex(0U);
+
+ const IntegerType *IdxTy =IntegerType::get(NewTy->getContext(), 32);
+ Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
+ Constant * const IdxList[] = {IdxZero, IdxZero};
+
+ Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList, 2);
+
+ // If we can't improve the situation by introspecting NewTy,
+ // we have to give up.
+ } else {
+ return 0;
+ }
+ }
+
+ // If we found compatible types, go ahead and push the bitcast
+ // onto the stored value.
+ Val = ConstantExpr::getBitCast(Val, NewTy);
+ }
+
MutatedMemory[Ptr] = Val;
} else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
InstResult = ConstantExpr::get(BO->getOpcode(),
@@ -2290,7 +2431,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
GlobalValue::InternalLinkage,
UndefValue::get(Ty),
AI->getName()));
- InstResult = AllocaTmps.back();
+ InstResult = AllocaTmps.back();
} else if (CallInst *CI = dyn_cast<CallInst>(CurInst)) {
// Debug info can safely be ignored here.
@@ -2324,11 +2465,11 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
} else {
if (Callee->getFunctionType()->isVarArg())
return false;
-
+
Constant *RetVal;
// Execute the call, if successful, use the return value.
if (!EvaluateFunction(Callee, RetVal, Formals, CallStack,
- MutatedMemory, AllocaTmps))
+ MutatedMemory, AllocaTmps, SimpleConstants, TD))
return false;
InstResult = RetVal;
}
@@ -2342,7 +2483,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
dyn_cast<ConstantInt>(getVal(Values, BI->getCondition()));
if (!Cond) return false; // Cannot determine.
- NewBB = BI->getSuccessor(!Cond->getZExtValue());
+ NewBB = BI->getSuccessor(!Cond->getZExtValue());
}
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
ConstantInt *Val =
@@ -2358,20 +2499,20 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
} else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) {
if (RI->getNumOperands())
RetVal = getVal(Values, RI->getOperand(0));
-
+
CallStack.pop_back(); // return from fn.
return true; // We succeeded at evaluating this ctor!
} else {
// invoke, unwind, unreachable.
return false; // Cannot handle this terminator.
}
-
+
// Okay, we succeeded in evaluating this control flow. See if we have
// executed the new block before. If so, we have a looping function,
// which we cannot evaluate in reasonable time.
if (!ExecutedBlocks.insert(NewBB))
return false; // looped!
-
+
// Okay, we have never been in this block before. Check to see if there
// are any PHI nodes. If so, evaluate them with information about where
// we came from.
@@ -2387,10 +2528,14 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
// Did not know how to evaluate this!
return false;
}
-
- if (!CurInst->use_empty())
+
+ if (!CurInst->use_empty()) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
+ InstResult = ConstantFoldConstantExpression(CE, TD);
+
Values[CurInst] = InstResult;
-
+ }
+
// Advance program counter.
++CurInst;
}
@@ -2398,7 +2543,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
/// we can. Return true if we can, false otherwise.
-static bool EvaluateStaticConstructor(Function *F) {
+static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) {
/// MutatedMemory - For each store we execute, we update this map. Loads
/// check this to get the most up-to-date value. If evaluation is successful,
/// this state is committed to the process.
@@ -2408,17 +2553,23 @@ static bool EvaluateStaticConstructor(Function *F) {
/// to represent its body. This vector is needed so we can delete the
/// temporary globals when we are done.
std::vector<GlobalVariable*> AllocaTmps;
-
+
/// CallStack - This is used to detect recursion. In pathological situations
/// we could hit exponential behavior, but at least there is nothing
/// unbounded.
std::vector<Function*> CallStack;
+ /// SimpleConstants - These are constants we have checked and know to be
+ /// simple enough to live in a static initializer of a global.
+ SmallPtrSet<Constant*, 8> SimpleConstants;
+
// Call the function.
Constant *RetValDummy;
bool EvalSuccess = EvaluateFunction(F, RetValDummy,
SmallVector<Constant*, 0>(), CallStack,
- MutatedMemory, AllocaTmps);
+ MutatedMemory, AllocaTmps,
+ SimpleConstants, TD);
+
if (EvalSuccess) {
// We succeeded at evaluation: commit the result.
DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
@@ -2428,13 +2579,13 @@ static bool EvaluateStaticConstructor(Function *F) {
E = MutatedMemory.end(); I != E; ++I)
CommitValueTo(I->second, I->first);
}
-
+
// At this point, we are done interpreting. If we created any 'alloca'
// temporaries, release them now.
while (!AllocaTmps.empty()) {
GlobalVariable *Tmp = AllocaTmps.back();
AllocaTmps.pop_back();
-
+
// If there are still users of the alloca, the program is doing something
// silly, e.g. storing the address of the alloca somewhere and using it
// later. Since this is undefined, we'll just make it be null.
@@ -2442,7 +2593,7 @@ static bool EvaluateStaticConstructor(Function *F) {
Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
delete Tmp;
}
-
+
return EvalSuccess;
}
@@ -2454,7 +2605,8 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
std::vector<Function*> Ctors = ParseGlobalCtors(GCL);
bool MadeChange = false;
if (Ctors.empty()) return false;
-
+
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
// Loop over global ctors, optimizing them when we can.
for (unsigned i = 0; i != Ctors.size(); ++i) {
Function *F = Ctors[i];
@@ -2467,12 +2619,12 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
}
break;
}
-
+
// We cannot simplify external ctor functions.
if (F->empty()) continue;
-
+
// If we can evaluate the ctor at compile time, do.
- if (EvaluateStaticConstructor(F)) {
+ if (EvaluateStaticConstructor(F, TD)) {
Ctors.erase(Ctors.begin()+i);
MadeChange = true;
--i;
@@ -2480,9 +2632,9 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
continue;
}
}
-
+
if (!MadeChange) return false;
-
+
GCL = InstallGlobalCtors(GCL, Ctors);
return true;
}
@@ -2546,21 +2698,21 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
bool GlobalOpt::runOnModule(Module &M) {
bool Changed = false;
-
+
// Try to find the llvm.globalctors list.
GlobalVariable *GlobalCtors = FindGlobalCtors(M);
bool LocalChange = true;
while (LocalChange) {
LocalChange = false;
-
+
// Delete functions that are trivially dead, ccc -> fastcc
LocalChange |= OptimizeFunctions(M);
-
+
// Optimize global_ctors list.
if (GlobalCtors)
LocalChange |= OptimizeGlobalCtorsList(GlobalCtors);
-
+
// Optimize non-address-taken globals.
LocalChange |= OptimizeGlobalVars(M);
@@ -2568,9 +2720,9 @@ bool GlobalOpt::runOnModule(Module &M) {
LocalChange |= OptimizeGlobalAliases(M);
Changed |= LocalChange;
}
-
+
// TODO: Move all global ctors functions to the end of the module for code
// layout.
-
+
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
index 1b3cf78..c7c2939 100644
--- a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -35,7 +35,9 @@ namespace {
///
struct IPCP : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- IPCP() : ModulePass(ID) {}
+ IPCP() : ModulePass(ID) {
+ initializeIPCPPass(*PassRegistry::getPassRegistry());
+ }
bool runOnModule(Module &M);
private:
@@ -46,7 +48,7 @@ namespace {
char IPCP::ID = 0;
INITIALIZE_PASS(IPCP, "ipconstprop",
- "Interprocedural constant propagation", false, false);
+ "Interprocedural constant propagation", false, false)
ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); }
diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
index 340b70e..fbe90ce 100644
--- a/contrib/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
@@ -7,17 +7,51 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the C bindings for libLLVMIPO.a, which implements
-// several transformations over the LLVM intermediate representation.
+// This file implements the common infrastructure (including C bindings) for
+// libLLVMIPO.a, which implements several transformations over the LLVM
+// intermediate representation.
//
//===----------------------------------------------------------------------===//
#include "llvm-c/Transforms/IPO.h"
+#include "llvm/InitializePasses.h"
#include "llvm/PassManager.h"
#include "llvm/Transforms/IPO.h"
using namespace llvm;
+void llvm::initializeIPO(PassRegistry &Registry) {
+ initializeArgPromotionPass(Registry);
+ initializeConstantMergePass(Registry);
+ initializeDAEPass(Registry);
+ initializeDAHPass(Registry);
+ initializeDTEPass(Registry);
+ initializeFunctionAttrsPass(Registry);
+ initializeGlobalDCEPass(Registry);
+ initializeGlobalOptPass(Registry);
+ initializeIPCPPass(Registry);
+ initializeAlwaysInlinerPass(Registry);
+ initializeSimpleInlinerPass(Registry);
+ initializeInternalizePassPass(Registry);
+ initializeLoopExtractorPass(Registry);
+ initializeBlockExtractorPassPass(Registry);
+ initializeSingleLoopExtractorPass(Registry);
+ initializeLowerSetJmpPass(Registry);
+ initializeMergeFunctionsPass(Registry);
+ initializePartialInlinerPass(Registry);
+ initializePruneEHPass(Registry);
+ initializeStripDeadPrototypesPassPass(Registry);
+ initializeStripSymbolsPass(Registry);
+ initializeStripDebugDeclarePass(Registry);
+ initializeStripDeadDebugInfoPass(Registry);
+ initializeStripNonDebugSymbolsPass(Registry);
+ initializeSRETPromotionPass(Registry);
+}
+
+void LLVMInitializeIPO(LLVMPassRegistryRef R) {
+ initializeIPO(*unwrap(R));
+}
+
void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createArgumentPromotionPass());
}
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
index ecc60ad..ce795b7 100644
--- a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
@@ -36,7 +36,9 @@ namespace {
InlineCostAnalyzer CA;
public:
// Use extremely low threshold.
- AlwaysInliner() : Inliner(ID, -2000000000) {}
+ AlwaysInliner() : Inliner(ID, -2000000000) {
+ initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
+ }
static char ID; // Pass identification, replacement for typeid
InlineCost getInlineCost(CallSite CS) {
return CA.getInlineCost(CS, NeverInline);
@@ -61,8 +63,11 @@ namespace {
}
char AlwaysInliner::ID = 0;
-INITIALIZE_PASS(AlwaysInliner, "always-inline",
- "Inliner for always_inline functions", false, false);
+INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
+ "Inliner for always_inline functions", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
+ "Inliner for always_inline functions", false, false)
Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); }
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
index 9c6637d..0c5b3be 100644
--- a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
@@ -33,8 +33,12 @@ namespace {
SmallPtrSet<const Function*, 16> NeverInline;
InlineCostAnalyzer CA;
public:
- SimpleInliner() : Inliner(ID) {}
- SimpleInliner(int Threshold) : Inliner(ID, Threshold) {}
+ SimpleInliner() : Inliner(ID) {
+ initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+ }
+ SimpleInliner(int Threshold) : Inliner(ID, Threshold) {
+ initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+ }
static char ID; // Pass identification, replacement for typeid
InlineCost getInlineCost(CallSite CS) {
return CA.getInlineCost(CS, NeverInline);
@@ -56,8 +60,11 @@ namespace {
}
char SimpleInliner::ID = 0;
-INITIALIZE_PASS(SimpleInliner, "inline",
- "Function Integration/Inlining", false, false);
+INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
+ "Function Integration/Inlining", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(SimpleInliner, "inline",
+ "Function Integration/Inlining", false, false)
Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); }
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
index 4983e8e..37eafd7 100644
--- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -52,7 +52,8 @@ Inliner::Inliner(char &ID)
: CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {}
Inliner::Inliner(char &ID, int Threshold)
- : CallGraphSCCPass(ID), InlineThreshold(Threshold) {}
+ : CallGraphSCCPass(ID), InlineThreshold(InlineLimit.getNumOccurrences() > 0 ?
+ InlineLimit : Threshold) {}
/// getAnalysisUsage - For this class, we declare that we require and preserve
/// the call graph. If the derived class implements this method, it should
@@ -74,7 +75,8 @@ InlinedArrayAllocasTy;
/// inline this call site we attempt to reuse already available allocas or add
/// any new allocas to the set if not possible.
static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
- InlinedArrayAllocasTy &InlinedArrayAllocas) {
+ InlinedArrayAllocasTy &InlinedArrayAllocas,
+ int InlineHistory) {
Function *Callee = CS.getCalledFunction();
Function *Caller = CS.getCaller();
@@ -91,7 +93,6 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
!Caller->hasFnAttr(Attribute::StackProtectReq))
Caller->addFnAttr(Attribute::StackProtect);
-
// Look at all of the allocas that we inlined through this call site. If we
// have already inlined other allocas through other calls into this function,
// then we know that they have disjoint lifetimes and that we can merge them.
@@ -115,6 +116,21 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
//
SmallPtrSet<AllocaInst*, 16> UsedAllocas;
+ // When processing our SCC, check to see if CS was inlined from some other
+ // call site. For example, if we're processing "A" in this code:
+ // A() { B() }
+ // B() { x = alloca ... C() }
+ // C() { y = alloca ... }
+ // Assume that C was not inlined into B initially, and so we're processing A
+ // and decide to inline B into A. Doing this makes an alloca available for
+ // reuse and makes a callsite (C) available for inlining. When we process
+ // the C call site we don't want to do any alloca merging between X and Y
+ // because their scopes are not disjoint. We could make this smarter by
+ // keeping track of the inline history for each alloca in the
+ // InlinedArrayAllocas but this isn't likely to be a significant win.
+ if (InlineHistory != -1) // Only do merging for top-level call sites in SCC.
+ return true;
+
// Loop over all the allocas we have so far and see if they can be merged with
// a previously inlined alloca. If not, remember that we had it.
for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size();
@@ -152,19 +168,21 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
// Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
// success!
- DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI);
+ DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: "
+ << *AvailableAlloca << '\n');
AI->replaceAllUsesWith(AvailableAlloca);
AI->eraseFromParent();
MergedAwayAlloca = true;
++NumMergedAllocas;
+ IFI.StaticAllocas[AllocaNo] = 0;
break;
}
// If we already nuked the alloca, we're done with it.
if (MergedAwayAlloca)
continue;
-
+
// If we were unable to merge away the alloca either because there are no
// allocas of the right type available or because we reused them all
// already, remember that this alloca came from an inlined function and mark
@@ -234,20 +252,25 @@ bool Inliner::shouldInline(CallSite CS) {
if (Caller->hasLocalLinkage()) {
int TotalSecondaryCost = 0;
bool outerCallsFound = false;
- bool allOuterCallsWillBeInlined = true;
- bool someOuterCallWouldNotBeInlined = false;
+ // This bool tracks what happens if we do NOT inline C into B.
+ bool callerWillBeRemoved = true;
+ // This bool tracks what happens if we DO inline C into B.
+ bool inliningPreventsSomeOuterInline = false;
for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end();
I != E; ++I) {
CallSite CS2(*I);
// If this isn't a call to Caller (it could be some other sort
- // of reference) skip it.
- if (!CS2 || CS2.getCalledFunction() != Caller)
+ // of reference) skip it. Such references will prevent the caller
+ // from being removed.
+ if (!CS2 || CS2.getCalledFunction() != Caller) {
+ callerWillBeRemoved = false;
continue;
+ }
InlineCost IC2 = getInlineCost(CS2);
if (IC2.isNever())
- allOuterCallsWillBeInlined = false;
+ callerWillBeRemoved = false;
if (IC2.isAlways() || IC2.isNever())
continue;
@@ -257,14 +280,14 @@ bool Inliner::shouldInline(CallSite CS) {
float FudgeFactor2 = getInlineFudgeFactor(CS2);
if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2))
- allOuterCallsWillBeInlined = false;
+ callerWillBeRemoved = false;
// See if we have this case. We subtract off the penalty
// for the call instruction, which we would be deleting.
if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) &&
Cost2 + Cost - (InlineConstants::CallPenalty + 1) >=
(int)(CurrentThreshold2 * FudgeFactor2)) {
- someOuterCallWouldNotBeInlined = true;
+ inliningPreventsSomeOuterInline = true;
TotalSecondaryCost += Cost2;
}
}
@@ -272,10 +295,10 @@ bool Inliner::shouldInline(CallSite CS) {
// one is set very low by getInlineCost, in anticipation that Caller will
// be removed entirely. We did not account for this above unless there
// is only one caller of Caller.
- if (allOuterCallsWillBeInlined && Caller->use_begin() != Caller->use_end())
+ if (callerWillBeRemoved && Caller->use_begin() != Caller->use_end())
TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
- if (outerCallsFound && someOuterCallWouldNotBeInlined &&
+ if (outerCallsFound && inliningPreventsSomeOuterInline &&
TotalSecondaryCost < Cost) {
DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() <<
" Cost = " << Cost <<
@@ -401,7 +424,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
// If this call site was obtained by inlining another function, verify
// that the include path for the function did not include the callee
- // itself. If so, we'd be recursively inlinling the same function,
+ // itself. If so, we'd be recursively inlining the same function,
// which would provide the same callsites, which would cause us to
// infinitely inline.
int InlineHistoryID = CallSites[CSi].second;
@@ -416,7 +439,8 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
continue;
// Attempt to inline the function.
- if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas))
+ if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
+ InlineHistoryID))
continue;
++NumInlined;
diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
index a1d919f..9b9ebad 100644
--- a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -64,10 +64,11 @@ namespace {
char InternalizePass::ID = 0;
INITIALIZE_PASS(InternalizePass, "internalize",
- "Internalize Global Symbols", false, false);
+ "Internalize Global Symbols", false, false)
InternalizePass::InternalizePass(bool AllButMain)
: ModulePass(ID), AllButMain(AllButMain){
+ initializeInternalizePassPass(*PassRegistry::getPassRegistry());
if (!APIFile.empty()) // If a filename is specified, use it.
LoadFile(APIFile.c_str());
if (!APIList.empty()) // If a list is specified, use it as well.
@@ -76,6 +77,7 @@ InternalizePass::InternalizePass(bool AllButMain)
InternalizePass::InternalizePass(const std::vector<const char *>&exportList)
: ModulePass(ID), AllButMain(false){
+ initializeInternalizePassPass(*PassRegistry::getPassRegistry());
for(std::vector<const char *>::const_iterator itr = exportList.begin();
itr != exportList.end(); itr++) {
ExternalNames.insert(*itr);
diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index f88dff6..848944d 100644
--- a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -37,7 +37,9 @@ namespace {
unsigned NumLoops;
explicit LoopExtractor(unsigned numLoops = ~0)
- : LoopPass(ID), NumLoops(numLoops) {}
+ : LoopPass(ID), NumLoops(numLoops) {
+ initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -50,8 +52,13 @@ namespace {
}
char LoopExtractor::ID = 0;
-INITIALIZE_PASS(LoopExtractor, "loop-extract",
- "Extract loops into new functions", false, false);
+INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract",
+ "Extract loops into new functions", false, false)
+INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(LoopExtractor, "loop-extract",
+ "Extract loops into new functions", false, false)
namespace {
/// SingleLoopExtractor - For bugpoint.
@@ -63,7 +70,7 @@ namespace {
char SingleLoopExtractor::ID = 0;
INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
- "Extract at most one loop into a new function", false, false);
+ "Extract at most one loop into a new function", false, false)
// createLoopExtractorPass - This pass extracts all natural loops from the
// program into a function if it can.
@@ -159,7 +166,7 @@ namespace {
char BlockExtractorPass::ID = 0;
INITIALIZE_PASS(BlockExtractorPass, "extract-blocks",
"Extract Basic Blocks From Module (for bugpoint use)",
- false, false);
+ false, false)
// createBlockExtractorPass - This pass extracts all blocks (except those
// specified in the argument list) from the functions in the module.
diff --git a/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp b/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp
index 6c715de..b545f0b 100644
--- a/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp
@@ -109,7 +109,9 @@ namespace {
bool IsTransformableFunction(StringRef Name);
public:
static char ID; // Pass identification, replacement for typeid
- LowerSetJmp() : ModulePass(ID) {}
+ LowerSetJmp() : ModulePass(ID) {
+ initializeLowerSetJmpPass(*PassRegistry::getPassRegistry());
+ }
void visitCallInst(CallInst& CI);
void visitInvokeInst(InvokeInst& II);
@@ -122,7 +124,7 @@ namespace {
} // end anonymous namespace
char LowerSetJmp::ID = 0;
-INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false);
+INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false)
// run - Run the transformation on the program. We grab the function
// prototypes for longjmp and setjmp. If they are used in the program,
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 5d838f9..cccffca 100644
--- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -67,42 +67,87 @@
using namespace llvm;
STATISTIC(NumFunctionsMerged, "Number of functions merged");
+STATISTIC(NumThunksWritten, "Number of thunks generated");
+STATISTIC(NumAliasesWritten, "Number of aliases generated");
+STATISTIC(NumDoubleWeak, "Number of new functions created");
+
+/// Creates a hash-code for the function which is the same for any two
+/// functions that will compare equal, without looking at the instructions
+/// inside the function.
+static unsigned profileFunction(const Function *F) {
+ const FunctionType *FTy = F->getFunctionType();
-namespace {
- /// MergeFunctions finds functions which will generate identical machine code,
- /// by considering all pointer types to be equivalent. Once identified,
- /// MergeFunctions will fold them by replacing a call to one to a call to a
- /// bitcast of the other.
- ///
- class MergeFunctions : public ModulePass {
- public:
- static char ID;
- MergeFunctions() : ModulePass(ID) {}
-
- bool runOnModule(Module &M);
-
- private:
- /// MergeTwoFunctions - Merge two equivalent functions. Upon completion, G
- /// may be deleted, or may be converted into a thunk. In either case, it
- /// should never be visited again.
- void MergeTwoFunctions(Function *F, Function *G) const;
-
- /// WriteThunk - Replace G with a simple tail call to bitcast(F). Also
- /// replace direct uses of G with bitcast(F).
- void WriteThunk(Function *F, Function *G) const;
-
- TargetData *TD;
- };
+ FoldingSetNodeID ID;
+ ID.AddInteger(F->size());
+ ID.AddInteger(F->getCallingConv());
+ ID.AddBoolean(F->hasGC());
+ ID.AddBoolean(FTy->isVarArg());
+ ID.AddInteger(FTy->getReturnType()->getTypeID());
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+ ID.AddInteger(FTy->getParamType(i)->getTypeID());
+ return ID.ComputeHash();
}
-char MergeFunctions::ID = 0;
-INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false);
+namespace {
+
+/// ComparableFunction - A struct that pairs together functions with a
+/// TargetData so that we can keep them together as elements in the DenseSet.
+class ComparableFunction {
+public:
+ static const ComparableFunction EmptyKey;
+ static const ComparableFunction TombstoneKey;
+ static TargetData * const LookupOnly;
+
+ ComparableFunction(Function *Func, TargetData *TD)
+ : Func(Func), Hash(profileFunction(Func)), TD(TD) {}
+
+ Function *getFunc() const { return Func; }
+ unsigned getHash() const { return Hash; }
+ TargetData *getTD() const { return TD; }
+
+ // Drops AssertingVH reference to the function. Outside of debug mode, this
+ // does nothing.
+ void release() {
+ assert(Func &&
+ "Attempted to release function twice, or release empty/tombstone!");
+ Func = NULL;
+ }
+
+private:
+ explicit ComparableFunction(unsigned Hash)
+ : Func(NULL), Hash(Hash), TD(NULL) {}
+
+ AssertingVH<Function> Func;
+ unsigned Hash;
+ TargetData *TD;
+};
+
+const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0);
+const ComparableFunction ComparableFunction::TombstoneKey =
+ ComparableFunction(1);
+TargetData * const ComparableFunction::LookupOnly = (TargetData*)(-1);
-ModulePass *llvm::createMergeFunctionsPass() {
- return new MergeFunctions();
+}
+
+namespace llvm {
+ template <>
+ struct DenseMapInfo<ComparableFunction> {
+ static ComparableFunction getEmptyKey() {
+ return ComparableFunction::EmptyKey;
+ }
+ static ComparableFunction getTombstoneKey() {
+ return ComparableFunction::TombstoneKey;
+ }
+ static unsigned getHashValue(const ComparableFunction &CF) {
+ return CF.getHash();
+ }
+ static bool isEqual(const ComparableFunction &LHS,
+ const ComparableFunction &RHS);
+ };
}
namespace {
+
/// FunctionComparator - Compares two functions to determine whether or not
/// they will generate machine code with the same behaviour. TargetData is
/// used if available. The comparator always fails conservatively (erring on the
@@ -111,34 +156,34 @@ class FunctionComparator {
public:
FunctionComparator(const TargetData *TD, const Function *F1,
const Function *F2)
- : F1(F1), F2(F2), TD(TD), IDMap1Count(0), IDMap2Count(0) {}
+ : F1(F1), F2(F2), TD(TD) {}
- /// Compare - test whether the two functions have equivalent behaviour.
- bool Compare();
+ /// Test whether the two functions have equivalent behaviour.
+ bool compare();
private:
- /// Compare - test whether two basic blocks have equivalent behaviour.
- bool Compare(const BasicBlock *BB1, const BasicBlock *BB2);
+ /// Test whether two basic blocks have equivalent behaviour.
+ bool compare(const BasicBlock *BB1, const BasicBlock *BB2);
- /// Enumerate - Assign or look up previously assigned numbers for the two
- /// values, and return whether the numbers are equal. Numbers are assigned in
- /// the order visited.
- bool Enumerate(const Value *V1, const Value *V2);
+ /// Assign or look up previously assigned numbers for the two values, and
+ /// return whether the numbers are equal. Numbers are assigned in the order
+ /// visited.
+ bool enumerate(const Value *V1, const Value *V2);
- /// isEquivalentOperation - Compare two Instructions for equivalence, similar
- /// to Instruction::isSameOperationAs but with modifications to the type
+ /// Compare two Instructions for equivalence, similar to
+ /// Instruction::isSameOperationAs but with modifications to the type
/// comparison.
bool isEquivalentOperation(const Instruction *I1,
const Instruction *I2) const;
- /// isEquivalentGEP - Compare two GEPs for equivalent pointer arithmetic.
+ /// Compare two GEPs for equivalent pointer arithmetic.
bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2);
bool isEquivalentGEP(const GetElementPtrInst *GEP1,
const GetElementPtrInst *GEP2) {
return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2));
}
- /// isEquivalentType - Compare two Types, treating all pointer types as equal.
+ /// Compare two Types, treating all pointer types as equal.
bool isEquivalentType(const Type *Ty1, const Type *Ty2) const;
// The two functions undergoing comparison.
@@ -146,20 +191,26 @@ private:
const TargetData *TD;
- typedef DenseMap<const Value *, unsigned long> IDMap;
- IDMap Map1, Map2;
- unsigned long IDMap1Count, IDMap2Count;
+ DenseMap<const Value *, const Value *> id_map;
+ DenseSet<const Value *> seen_values;
};
+
}
-/// isEquivalentType - any two pointers in the same address space are
-/// equivalent. Otherwise, standard type equivalence rules apply.
+// Any two pointers in the same address space are equivalent, intptr_t and
+// pointers are equivalent. Otherwise, standard type equivalence rules apply.
bool FunctionComparator::isEquivalentType(const Type *Ty1,
const Type *Ty2) const {
if (Ty1 == Ty2)
return true;
- if (Ty1->getTypeID() != Ty2->getTypeID())
+ if (Ty1->getTypeID() != Ty2->getTypeID()) {
+ if (TD) {
+ LLVMContext &Ctx = Ty1->getContext();
+ if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ctx)) return true;
+ if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ctx)) return true;
+ }
return false;
+ }
switch(Ty1->getTypeID()) {
default:
@@ -167,6 +218,7 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1,
// Fall through in Release mode.
case Type::IntegerTyID:
case Type::OpaqueTyID:
+ case Type::VectorTyID:
// Ty1 == Ty2 would have returned true earlier.
return false;
@@ -225,21 +277,18 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1,
return ATy1->getNumElements() == ATy2->getNumElements() &&
isEquivalentType(ATy1->getElementType(), ATy2->getElementType());
}
-
- case Type::VectorTyID: {
- const VectorType *VTy1 = cast<VectorType>(Ty1);
- const VectorType *VTy2 = cast<VectorType>(Ty2);
- return VTy1->getNumElements() == VTy2->getNumElements() &&
- isEquivalentType(VTy1->getElementType(), VTy2->getElementType());
- }
}
}
-/// isEquivalentOperation - determine whether the two operations are the same
-/// except that pointer-to-A and pointer-to-B are equivalent. This should be
-/// kept in sync with Instruction::isSameOperationAs.
+// Determine whether the two operations are the same except that pointer-to-A
+// and pointer-to-B are equivalent. This should be kept in sync with
+// Instruction::isSameOperationAs.
bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
const Instruction *I2) const {
+ // Differences from Instruction::isSameOperationAs:
+ // * replace type comparison with calls to isEquivalentType.
+ // * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top
+ // * because of the above, we don't test for the tail bit on calls later on
if (I1->getOpcode() != I2->getOpcode() ||
I1->getNumOperands() != I2->getNumOperands() ||
!isEquivalentType(I1->getType(), I2->getType()) ||
@@ -263,14 +312,11 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
if (const CmpInst *CI = dyn_cast<CmpInst>(I1))
return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate();
if (const CallInst *CI = dyn_cast<CallInst>(I1))
- return CI->isTailCall() == cast<CallInst>(I2)->isTailCall() &&
- CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() &&
- CI->getAttributes().getRawPointer() ==
- cast<CallInst>(I2)->getAttributes().getRawPointer();
+ return CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() &&
+ CI->getAttributes() == cast<CallInst>(I2)->getAttributes();
if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1))
return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() &&
- CI->getAttributes().getRawPointer() ==
- cast<InvokeInst>(I2)->getAttributes().getRawPointer();
+ CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes();
if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) {
if (IVI->getNumIndices() != cast<InsertValueInst>(I2)->getNumIndices())
return false;
@@ -291,8 +337,7 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
return true;
}
-/// isEquivalentGEP - determine whether two GEP operations perform the same
-/// underlying arithmetic.
+// Determine whether two GEP operations perform the same underlying arithmetic.
bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
const GEPOperator *GEP2) {
// When we have target data, we can reduce the GEP down to the value in bytes
@@ -315,17 +360,17 @@ bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
return false;
for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) {
- if (!Enumerate(GEP1->getOperand(i), GEP2->getOperand(i)))
+ if (!enumerate(GEP1->getOperand(i), GEP2->getOperand(i)))
return false;
}
return true;
}
-/// Enumerate - Compare two values used by the two functions under pair-wise
-/// comparison. If this is the first time the values are seen, they're added to
-/// the mapping so that we will detect mismatches on next use.
-bool FunctionComparator::Enumerate(const Value *V1, const Value *V2) {
+// Compare two values used by the two functions under pair-wise comparison. If
+// this is the first time the values are seen, they're added to the mapping so
+// that we will detect mismatches on next use.
+bool FunctionComparator::enumerate(const Value *V1, const Value *V2) {
// Check for function @f1 referring to itself and function @f2 referring to
// itself, or referring to each other, or both referring to either of them.
// They're all equivalent if the two functions are otherwise equivalent.
@@ -334,35 +379,44 @@ bool FunctionComparator::Enumerate(const Value *V1, const Value *V2) {
if (V1 == F2 && V2 == F1)
return true;
- // TODO: constant expressions with GEP or references to F1 or F2.
- if (isa<Constant>(V1))
- return V1 == V2;
-
- if (isa<InlineAsm>(V1) && isa<InlineAsm>(V2)) {
- const InlineAsm *IA1 = cast<InlineAsm>(V1);
- const InlineAsm *IA2 = cast<InlineAsm>(V2);
- return IA1->getAsmString() == IA2->getAsmString() &&
- IA1->getConstraintString() == IA2->getConstraintString();
+ if (const Constant *C1 = dyn_cast<Constant>(V1)) {
+ if (V1 == V2) return true;
+ const Constant *C2 = dyn_cast<Constant>(V2);
+ if (!C2) return false;
+ // TODO: constant expressions with GEP or references to F1 or F2.
+ if (C1->isNullValue() && C2->isNullValue() &&
+ isEquivalentType(C1->getType(), C2->getType()))
+ return true;
+ // Try bitcasting C2 to C1's type. If the bitcast is legal and returns C1
+ // then they must have equal bit patterns.
+ return C1->getType()->canLosslesslyBitCastTo(C2->getType()) &&
+ C1 == ConstantExpr::getBitCast(const_cast<Constant*>(C2), C1->getType());
}
- unsigned long &ID1 = Map1[V1];
- if (!ID1)
- ID1 = ++IDMap1Count;
+ if (isa<InlineAsm>(V1) || isa<InlineAsm>(V2))
+ return V1 == V2;
- unsigned long &ID2 = Map2[V2];
- if (!ID2)
- ID2 = ++IDMap2Count;
+ // Check that V1 maps to V2. If we find a value that V1 maps to then we simply
+ // check whether it's equal to V2. When there is no mapping then we need to
+ // ensure that V2 isn't already equivalent to something else. For this
+ // purpose, we track the V2 values in a set.
- return ID1 == ID2;
+ const Value *&map_elem = id_map[V1];
+ if (map_elem)
+ return map_elem == V2;
+ if (!seen_values.insert(V2).second)
+ return false;
+ map_elem = V2;
+ return true;
}
-/// Compare - test whether two basic blocks have equivalent behaviour.
-bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) {
+// Test whether two basic blocks have equivalent behaviour.
+bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) {
BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end();
BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end();
do {
- if (!Enumerate(F1I, F2I))
+ if (!enumerate(F1I, F2I))
return false;
if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) {
@@ -370,7 +424,7 @@ bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) {
if (!GEP2)
return false;
- if (!Enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
+ if (!enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
return false;
if (!isEquivalentGEP(GEP1, GEP2))
@@ -384,7 +438,7 @@ bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) {
Value *OpF1 = F1I->getOperand(i);
Value *OpF2 = F2I->getOperand(i);
- if (!Enumerate(OpF1, OpF2))
+ if (!enumerate(OpF1, OpF2))
return false;
if (OpF1->getValueID() != OpF2->getValueID() ||
@@ -399,8 +453,8 @@ bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) {
return F1I == F1E && F2I == F2E;
}
-/// Compare - test whether the two functions have equivalent behaviour.
-bool FunctionComparator::Compare() {
+// Test whether the two functions have equivalent behaviour.
+bool FunctionComparator::compare() {
// We need to recheck everything, but check the things that weren't included
// in the hash first.
@@ -431,14 +485,14 @@ bool FunctionComparator::Compare() {
return false;
assert(F1->arg_size() == F2->arg_size() &&
- "Identical functions have a different number of args.");
+ "Identically typed functions have different numbers of args!");
// Visit the arguments so that they get enumerated in the order they're
// passed in.
for (Function::const_arg_iterator f1i = F1->arg_begin(),
f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) {
- if (!Enumerate(f1i, f2i))
- llvm_unreachable("Arguments repeat");
+ if (!enumerate(f1i, f2i))
+ llvm_unreachable("Arguments repeat!");
}
// We do a CFG-ordered walk since the actual ordering of the blocks in the
@@ -456,7 +510,7 @@ bool FunctionComparator::Compare() {
const BasicBlock *F1BB = F1BBs.pop_back_val();
const BasicBlock *F2BB = F2BBs.pop_back_val();
- if (!Enumerate(F1BB, F2BB) || !Compare(F1BB, F2BB))
+ if (!enumerate(F1BB, F2BB) || !compare(F1BB, F2BB))
return false;
const TerminatorInst *F1TI = F1BB->getTerminator();
@@ -474,23 +528,190 @@ bool FunctionComparator::Compare() {
return true;
}
-/// WriteThunk - Replace G with a simple tail call to bitcast(F). Also replace
-/// direct uses of G with bitcast(F).
-void MergeFunctions::WriteThunk(Function *F, Function *G) const {
+namespace {
+
+/// MergeFunctions finds functions which will generate identical machine code,
+/// by considering all pointer types to be equivalent. Once identified,
+/// MergeFunctions will fold them by replacing a call to one to a call to a
+/// bitcast of the other.
+///
+class MergeFunctions : public ModulePass {
+public:
+ static char ID;
+ MergeFunctions()
+ : ModulePass(ID), HasGlobalAliases(false) {
+ initializeMergeFunctionsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M);
+
+private:
+ typedef DenseSet<ComparableFunction> FnSetType;
+
+ /// A work queue of functions that may have been modified and should be
+ /// analyzed again.
+ std::vector<WeakVH> Deferred;
+
+ /// Insert a ComparableFunction into the FnSet, or merge it away if it's
+ /// equal to one that's already present.
+ bool insert(ComparableFunction &NewF);
+
+ /// Remove a Function from the FnSet and queue it up for a second sweep of
+ /// analysis.
+ void remove(Function *F);
+
+ /// Find the functions that use this Value and remove them from FnSet and
+ /// queue the functions.
+ void removeUsers(Value *V);
+
+ /// Replace all direct calls of Old with calls of New. Will bitcast New if
+ /// necessary to make types match.
+ void replaceDirectCallers(Function *Old, Function *New);
+
+ /// Merge two equivalent functions. Upon completion, G may be deleted, or may
+ /// be converted into a thunk. In either case, it should never be visited
+ /// again.
+ void mergeTwoFunctions(Function *F, Function *G);
+
+ /// Replace G with a thunk or an alias to F. Deletes G.
+ void writeThunkOrAlias(Function *F, Function *G);
+
+ /// Replace G with a simple tail call to bitcast(F). Also replace direct uses
+ /// of G with bitcast(F). Deletes G.
+ void writeThunk(Function *F, Function *G);
+
+ /// Replace G with an alias to F. Deletes G.
+ void writeAlias(Function *F, Function *G);
+
+ /// The set of all distinct functions. Use the insert() and remove() methods
+ /// to modify it.
+ FnSetType FnSet;
+
+ /// TargetData for more accurate GEP comparisons. May be NULL.
+ TargetData *TD;
+
+ /// Whether or not the target supports global aliases.
+ bool HasGlobalAliases;
+};
+
+} // end anonymous namespace
+
+char MergeFunctions::ID = 0;
+INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false)
+
+ModulePass *llvm::createMergeFunctionsPass() {
+ return new MergeFunctions();
+}
+
+bool MergeFunctions::runOnModule(Module &M) {
+ bool Changed = false;
+ TD = getAnalysisIfAvailable<TargetData>();
+
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
+ Deferred.push_back(WeakVH(I));
+ }
+ FnSet.resize(Deferred.size());
+
+ do {
+ std::vector<WeakVH> Worklist;
+ Deferred.swap(Worklist);
+
+ DEBUG(dbgs() << "size of module: " << M.size() << '\n');
+ DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');
+
+ // Insert only strong functions and merge them. Strong function merging
+ // always deletes one of them.
+ for (std::vector<WeakVH>::iterator I = Worklist.begin(),
+ E = Worklist.end(); I != E; ++I) {
+ if (!*I) continue;
+ Function *F = cast<Function>(*I);
+ if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
+ !F->mayBeOverridden()) {
+ ComparableFunction CF = ComparableFunction(F, TD);
+ Changed |= insert(CF);
+ }
+ }
+
+ // Insert only weak functions and merge them. By doing these second we
+ // create thunks to the strong function when possible. When two weak
+ // functions are identical, we create a new strong function with two weak
+ // weak thunks to it which are identical but not mergable.
+ for (std::vector<WeakVH>::iterator I = Worklist.begin(),
+ E = Worklist.end(); I != E; ++I) {
+ if (!*I) continue;
+ Function *F = cast<Function>(*I);
+ if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
+ F->mayBeOverridden()) {
+ ComparableFunction CF = ComparableFunction(F, TD);
+ Changed |= insert(CF);
+ }
+ }
+ DEBUG(dbgs() << "size of FnSet: " << FnSet.size() << '\n');
+ } while (!Deferred.empty());
+
+ FnSet.clear();
+
+ return Changed;
+}
+
+bool DenseMapInfo<ComparableFunction>::isEqual(const ComparableFunction &LHS,
+ const ComparableFunction &RHS) {
+ if (LHS.getFunc() == RHS.getFunc() &&
+ LHS.getHash() == RHS.getHash())
+ return true;
+ if (!LHS.getFunc() || !RHS.getFunc())
+ return false;
+
+ // One of these is a special "underlying pointer comparison only" object.
+ if (LHS.getTD() == ComparableFunction::LookupOnly ||
+ RHS.getTD() == ComparableFunction::LookupOnly)
+ return false;
+
+ assert(LHS.getTD() == RHS.getTD() &&
+ "Comparing functions for different targets");
+
+ return FunctionComparator(LHS.getTD(), LHS.getFunc(),
+ RHS.getFunc()).compare();
+}
+
+// Replace direct callers of Old with New.
+void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
+ Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType());
+ for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
+ UI != UE;) {
+ Value::use_iterator TheIter = UI;
+ ++UI;
+ CallSite CS(*TheIter);
+ if (CS && CS.isCallee(TheIter)) {
+ remove(CS.getInstruction()->getParent()->getParent());
+ TheIter.getUse().set(BitcastNew);
+ }
+ }
+}
+
+// Replace G with an alias to F if possible, or else a thunk to F. Deletes G.
+void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
+ if (HasGlobalAliases && G->hasUnnamedAddr()) {
+ if (G->hasExternalLinkage() || G->hasLocalLinkage() ||
+ G->hasWeakLinkage()) {
+ writeAlias(F, G);
+ return;
+ }
+ }
+
+ writeThunk(F, G);
+}
+
+// Replace G with a simple tail call to bitcast(F). Also replace direct uses
+// of G with bitcast(F). Deletes G.
+void MergeFunctions::writeThunk(Function *F, Function *G) {
if (!G->mayBeOverridden()) {
// Redirect direct callers of G to F.
- Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
- for (Value::use_iterator UI = G->use_begin(), UE = G->use_end();
- UI != UE;) {
- Value::use_iterator TheIter = UI;
- ++UI;
- CallSite CS(*TheIter);
- if (CS && CS.isCallee(TheIter))
- TheIter.getUse().set(BitcastF);
- }
+ replaceDirectCallers(G, F);
}
- // If G was internal then we may have replaced all uses if G with F. If so,
+ // If G was internal then we may have replaced all uses of G with F. If so,
// stop here and delete G. There's no need for a thunk.
if (G->hasLocalLinkage() && G->use_empty()) {
G->eraseFromParent();
@@ -522,131 +743,126 @@ void MergeFunctions::WriteThunk(Function *F, Function *G) const {
NewG->copyAttributesFrom(G);
NewG->takeName(G);
+ removeUsers(G);
G->replaceAllUsesWith(NewG);
G->eraseFromParent();
+
+ DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n');
+ ++NumThunksWritten;
}
-/// MergeTwoFunctions - Merge two equivalent functions. Upon completion,
-/// Function G is deleted.
-void MergeFunctions::MergeTwoFunctions(Function *F, Function *G) const {
- if (F->isWeakForLinker()) {
- assert(G->isWeakForLinker());
+// Replace G with an alias to F and delete G.
+void MergeFunctions::writeAlias(Function *F, Function *G) {
+ Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
+ GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "",
+ BitcastF, G->getParent());
+ F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+ GA->takeName(G);
+ GA->setVisibility(G->getVisibility());
+ removeUsers(G);
+ G->replaceAllUsesWith(GA);
+ G->eraseFromParent();
+
+ DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n');
+ ++NumAliasesWritten;
+}
+
+// Merge two equivalent functions. Upon completion, Function G is deleted.
+void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
+ if (F->mayBeOverridden()) {
+ assert(G->mayBeOverridden());
+
+ if (HasGlobalAliases) {
+ // Make them both thunks to the same internal function.
+ Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
+ F->getParent());
+ H->copyAttributesFrom(F);
+ H->takeName(F);
+ removeUsers(F);
+ F->replaceAllUsesWith(H);
- // Make them both thunks to the same internal function.
- Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
- F->getParent());
- H->copyAttributesFrom(F);
- H->takeName(F);
- F->replaceAllUsesWith(H);
+ unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
- unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
+ writeAlias(F, G);
+ writeAlias(F, H);
- WriteThunk(F, G);
- WriteThunk(F, H);
+ F->setAlignment(MaxAlignment);
+ F->setLinkage(GlobalValue::PrivateLinkage);
+ } else {
+ // We can't merge them. Instead, pick one and update all direct callers
+ // to call it and hope that we improve the instruction cache hit rate.
+ replaceDirectCallers(G, F);
+ }
- F->setAlignment(MaxAlignment);
- F->setLinkage(GlobalValue::InternalLinkage);
+ ++NumDoubleWeak;
} else {
- WriteThunk(F, G);
+ writeThunkOrAlias(F, G);
}
++NumFunctionsMerged;
}
-static unsigned ProfileFunction(const Function *F) {
- const FunctionType *FTy = F->getFunctionType();
-
- FoldingSetNodeID ID;
- ID.AddInteger(F->size());
- ID.AddInteger(F->getCallingConv());
- ID.AddBoolean(F->hasGC());
- ID.AddBoolean(FTy->isVarArg());
- ID.AddInteger(FTy->getReturnType()->getTypeID());
- for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
- ID.AddInteger(FTy->getParamType(i)->getTypeID());
- return ID.ComputeHash();
-}
-
-class ComparableFunction {
-public:
- ComparableFunction(Function *Func, TargetData *TD)
- : Func(Func), Hash(ProfileFunction(Func)), TD(TD) {}
+// Insert a ComparableFunction into the FnSet, or merge it away if equal to one
+// that was already inserted.
+bool MergeFunctions::insert(ComparableFunction &NewF) {
+ std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF);
+ if (Result.second) {
+ DEBUG(dbgs() << "Inserting as unique: " << NewF.getFunc()->getName() << '\n');
+ return false;
+ }
- AssertingVH<Function> const Func;
- const unsigned Hash;
- TargetData * const TD;
-};
+ const ComparableFunction &OldF = *Result.first;
-struct MergeFunctionsEqualityInfo {
- static ComparableFunction *getEmptyKey() {
- return reinterpret_cast<ComparableFunction*>(0);
- }
- static ComparableFunction *getTombstoneKey() {
- return reinterpret_cast<ComparableFunction*>(-1);
- }
- static unsigned getHashValue(const ComparableFunction *CF) {
- return CF->Hash;
- }
- static bool isEqual(const ComparableFunction *LHS,
- const ComparableFunction *RHS) {
- if (LHS == RHS)
- return true;
- if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
- RHS == getEmptyKey() || RHS == getTombstoneKey())
- return false;
- assert(LHS->TD == RHS->TD && "Comparing functions for different targets");
- return FunctionComparator(LHS->TD, LHS->Func, RHS->Func).Compare();
- }
-};
+ // Never thunk a strong function to a weak function.
+ assert(!OldF.getFunc()->mayBeOverridden() ||
+ NewF.getFunc()->mayBeOverridden());
-bool MergeFunctions::runOnModule(Module &M) {
- typedef DenseSet<ComparableFunction *, MergeFunctionsEqualityInfo> FnSetType;
+ DEBUG(dbgs() << " " << OldF.getFunc()->getName() << " == "
+ << NewF.getFunc()->getName() << '\n');
- bool Changed = false;
- TD = getAnalysisIfAvailable<TargetData>();
+ Function *DeleteF = NewF.getFunc();
+ NewF.release();
+ mergeTwoFunctions(OldF.getFunc(), DeleteF);
+ return true;
+}
- std::vector<Function *> Funcs;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage())
- Funcs.push_back(F);
+// Remove a function from FnSet. If it was already in FnSet, add it to Deferred
+// so that we'll look at it in the next round.
+void MergeFunctions::remove(Function *F) {
+ // We need to make sure we remove F, not a function "equal" to F per the
+ // function equality comparator.
+ //
+ // The special "lookup only" ComparableFunction bypasses the expensive
+ // function comparison in favour of a pointer comparison on the underlying
+ // Function*'s.
+ ComparableFunction CF = ComparableFunction(F, ComparableFunction::LookupOnly);
+ if (FnSet.erase(CF)) {
+ DEBUG(dbgs() << "Removed " << F->getName() << " from set and deferred it.\n");
+ Deferred.push_back(F);
}
+}
- bool LocalChanged;
- do {
- LocalChanged = false;
-
- FnSetType FnSet;
- for (unsigned i = 0, e = Funcs.size(); i != e;) {
- Function *F = Funcs[i];
- ComparableFunction *NewF = new ComparableFunction(F, TD);
- std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF);
- if (!Result.second) {
- ComparableFunction *&OldF = *Result.first;
- assert(OldF && "Expected a hash collision");
-
- // NewF will be deleted in favour of OldF unless NewF is strong and
- // OldF is weak in which case swap them to keep the strong definition.
-
- if (OldF->Func->isWeakForLinker() && !NewF->Func->isWeakForLinker())
- std::swap(OldF, NewF);
-
- DEBUG(dbgs() << " " << OldF->Func->getName() << " == "
- << NewF->Func->getName() << '\n');
-
- Funcs.erase(Funcs.begin() + i);
- --e;
-
- Function *DeleteF = NewF->Func;
- delete NewF;
- MergeTwoFunctions(OldF->Func, DeleteF);
- LocalChanged = true;
- Changed = true;
- } else {
- ++i;
+// For each instruction used by the value, remove() the function that contains
+// the instruction. This should happen right before a call to RAUW.
+void MergeFunctions::removeUsers(Value *V) {
+ std::vector<Value *> Worklist;
+ Worklist.push_back(V);
+ while (!Worklist.empty()) {
+ Value *V = Worklist.back();
+ Worklist.pop_back();
+
+ for (Value::use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ Use &U = UI.getUse();
+ if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
+ remove(I->getParent()->getParent());
+ } else if (isa<GlobalValue>(U.getUser())) {
+ // do nothing
+ } else if (Constant *C = dyn_cast<Constant>(U.getUser())) {
+ for (Value::use_iterator CUI = C->use_begin(), CUE = C->use_end();
+ CUI != CUE; ++CUI)
+ Worklist.push_back(*CUI);
}
}
- DeleteContainerPointers(FnSet);
- } while (LocalChanged);
-
- return Changed;
+ }
}
diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 432f7c5..2afd029 100644
--- a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -30,7 +30,9 @@ namespace {
struct PartialInliner : public ModulePass {
virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
static char ID; // Pass identification, replacement for typeid
- PartialInliner() : ModulePass(ID) {}
+ PartialInliner() : ModulePass(ID) {
+ initializePartialInlinerPass(*PassRegistry::getPassRegistry());
+ }
bool runOnModule(Module& M);
@@ -41,7 +43,7 @@ namespace {
char PartialInliner::ID = 0;
INITIALIZE_PASS(PartialInliner, "partial-inliner",
- "Partial Inliner", false, false);
+ "Partial Inliner", false, false)
ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
@@ -67,7 +69,7 @@ Function* PartialInliner::unswitchFunction(Function* F) {
return 0;
// Clone the function, so that we can hack away on it.
- ValueMap<const Value*, Value*> VMap;
+ ValueToValueMapTy VMap;
Function* duplicateFunction = CloneFunction(F, VMap,
/*ModuleLevelChanges=*/false);
duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
diff --git a/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp b/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp
deleted file mode 100644
index 4a99a41..0000000
--- a/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp
+++ /dev/null
@@ -1,216 +0,0 @@
-//===-- PartialSpecialization.cpp - Specialize for common constants--------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass finds function arguments that are often a common constant and
-// specializes a version of the called function for that constant.
-//
-// This pass simply does the cloning for functions it specializes. It depends
-// on IPSCCP and DAE to clean up the results.
-//
-// The initial heuristic favors constant arguments that are used in control
-// flow.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "partialspecialization"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Constant.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/ADT/DenseSet.h"
-#include <map>
-using namespace llvm;
-
-STATISTIC(numSpecialized, "Number of specialized functions created");
-STATISTIC(numReplaced, "Number of callers replaced by specialization");
-
-// Maximum number of arguments markable interested
-static const int MaxInterests = 6;
-
-// Call must be used at least occasionally
-static const int CallsMin = 5;
-
-// Must have 10% of calls having the same constant to specialize on
-static const double ConstValPercent = .1;
-
-namespace {
- typedef SmallVector<int, MaxInterests> InterestingArgVector;
- class PartSpec : public ModulePass {
- void scanForInterest(Function&, InterestingArgVector&);
- int scanDistribution(Function&, int, std::map<Constant*, int>&);
- public :
- static char ID; // Pass identification, replacement for typeid
- PartSpec() : ModulePass(ID) {}
- bool runOnModule(Module &M);
- };
-}
-
-char PartSpec::ID = 0;
-INITIALIZE_PASS(PartSpec, "partialspecialization",
- "Partial Specialization", false, false);
-
-// Specialize F by replacing the arguments (keys) in replacements with the
-// constants (values). Replace all calls to F with those constants with
-// a call to the specialized function. Returns the specialized function
-static Function*
-SpecializeFunction(Function* F,
- ValueMap<const Value*, Value*>& replacements) {
- // arg numbers of deleted arguments
- DenseMap<unsigned, const Argument*> deleted;
- for (ValueMap<const Value*, Value*>::iterator
- repb = replacements.begin(), repe = replacements.end();
- repb != repe; ++repb) {
- Argument const *arg = cast<const Argument>(repb->first);
- deleted[arg->getArgNo()] = arg;
- }
-
- Function* NF = CloneFunction(F, replacements,
- /*ModuleLevelChanges=*/false);
- NF->setLinkage(GlobalValue::InternalLinkage);
- F->getParent()->getFunctionList().push_back(NF);
-
- for (Value::use_iterator ii = F->use_begin(), ee = F->use_end();
- ii != ee; ) {
- Value::use_iterator i = ii;
- ++ii;
- User *U = *i;
- CallSite CS(U);
- if (CS) {
- if (CS.getCalledFunction() == F) {
- SmallVector<Value*, 6> args;
- // Assemble the non-specialized arguments for the updated callsite.
- // In the process, make sure that the specialized arguments are
- // constant and match the specialization. If that's not the case,
- // this callsite needs to call the original or some other
- // specialization; don't change it here.
- CallSite::arg_iterator as = CS.arg_begin(), ae = CS.arg_end();
- for (CallSite::arg_iterator ai = as; ai != ae; ++ai) {
- DenseMap<unsigned, const Argument*>::iterator delit = deleted.find(
- std::distance(as, ai));
- if (delit == deleted.end())
- args.push_back(cast<Value>(ai));
- else {
- Constant *ci = dyn_cast<Constant>(ai);
- if (!(ci && ci == replacements[delit->second]))
- goto next_use;
- }
- }
- Value* NCall;
- if (CallInst *CI = dyn_cast<CallInst>(U)) {
- NCall = CallInst::Create(NF, args.begin(), args.end(),
- CI->getName(), CI);
- cast<CallInst>(NCall)->setTailCall(CI->isTailCall());
- cast<CallInst>(NCall)->setCallingConv(CI->getCallingConv());
- } else {
- InvokeInst *II = cast<InvokeInst>(U);
- NCall = InvokeInst::Create(NF, II->getNormalDest(),
- II->getUnwindDest(),
- args.begin(), args.end(),
- II->getName(), II);
- cast<InvokeInst>(NCall)->setCallingConv(II->getCallingConv());
- }
- CS.getInstruction()->replaceAllUsesWith(NCall);
- CS.getInstruction()->eraseFromParent();
- ++numReplaced;
- }
- }
- next_use:;
- }
- return NF;
-}
-
-
-bool PartSpec::runOnModule(Module &M) {
- bool Changed = false;
- for (Module::iterator I = M.begin(); I != M.end(); ++I) {
- Function &F = *I;
- if (F.isDeclaration() || F.mayBeOverridden()) continue;
- InterestingArgVector interestingArgs;
- scanForInterest(F, interestingArgs);
-
- // Find the first interesting Argument that we can specialize on
- // If there are multiple interesting Arguments, then those will be found
- // when processing the cloned function.
- bool breakOuter = false;
- for (unsigned int x = 0; !breakOuter && x < interestingArgs.size(); ++x) {
- std::map<Constant*, int> distribution;
- int total = scanDistribution(F, interestingArgs[x], distribution);
- if (total > CallsMin)
- for (std::map<Constant*, int>::iterator ii = distribution.begin(),
- ee = distribution.end(); ii != ee; ++ii)
- if (total > ii->second && ii->first &&
- ii->second > total * ConstValPercent) {
- ValueMap<const Value*, Value*> m;
- Function::arg_iterator arg = F.arg_begin();
- for (int y = 0; y < interestingArgs[x]; ++y)
- ++arg;
- m[&*arg] = ii->first;
- SpecializeFunction(&F, m);
- ++numSpecialized;
- breakOuter = true;
- Changed = true;
- }
- }
- }
- return Changed;
-}
-
-/// scanForInterest - This function decides which arguments would be worth
-/// specializing on.
-void PartSpec::scanForInterest(Function& F, InterestingArgVector& args) {
- for(Function::arg_iterator ii = F.arg_begin(), ee = F.arg_end();
- ii != ee; ++ii) {
- for(Value::use_iterator ui = ii->use_begin(), ue = ii->use_end();
- ui != ue; ++ui) {
-
- bool interesting = false;
- User *U = *ui;
- if (isa<CmpInst>(U)) interesting = true;
- else if (isa<CallInst>(U))
- interesting = ui->getOperand(0) == ii;
- else if (isa<InvokeInst>(U))
- interesting = ui->getOperand(0) == ii;
- else if (isa<SwitchInst>(U)) interesting = true;
- else if (isa<BranchInst>(U)) interesting = true;
-
- if (interesting) {
- args.push_back(std::distance(F.arg_begin(), ii));
- break;
- }
- }
- }
-}
-
-/// scanDistribution - Construct a histogram of constants for arg of F at arg.
-int PartSpec::scanDistribution(Function& F, int arg,
- std::map<Constant*, int>& dist) {
- bool hasIndirect = false;
- int total = 0;
- for (Value::use_iterator ii = F.use_begin(), ee = F.use_end();
- ii != ee; ++ii) {
- User *U = *ii;
- CallSite CS(U);
- if (CS && CS.getCalledFunction() == &F) {
- ++dist[dyn_cast<Constant>(CS.getArgument(arg))];
- ++total;
- } else
- hasIndirect = true;
- }
-
- // Preserve the original address taken function even if all other uses
- // will be specialized.
- if (hasIndirect) ++total;
- return total;
-}
-
-ModulePass* llvm::createPartialSpecializationPass() { return new PartSpec(); }
diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
index 09ac76f..d91c2c4 100644
--- a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -37,7 +37,9 @@ STATISTIC(NumUnreach, "Number of noreturn calls optimized");
namespace {
struct PruneEH : public CallGraphSCCPass {
static char ID; // Pass identification, replacement for typeid
- PruneEH() : CallGraphSCCPass(ID) {}
+ PruneEH() : CallGraphSCCPass(ID) {
+ initializePruneEHPass(*PassRegistry::getPassRegistry());
+ }
// runOnSCC - Analyze the SCC, performing the transformation if possible.
bool runOnSCC(CallGraphSCC &SCC);
@@ -48,8 +50,11 @@ namespace {
}
char PruneEH::ID = 0;
-INITIALIZE_PASS(PruneEH, "prune-eh",
- "Remove unused exception handling info", false, false);
+INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh",
+ "Remove unused exception handling info", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(PruneEH, "prune-eh",
+ "Remove unused exception handling info", false, false)
Pass *llvm::createPruneEHPass() { return new PruneEH(); }
diff --git a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
index ee10ad0..b5f09ec 100644
--- a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -29,7 +29,9 @@ namespace {
class StripDeadPrototypesPass : public ModulePass {
public:
static char ID; // Pass identification, replacement for typeid
- StripDeadPrototypesPass() : ModulePass(ID) { }
+ StripDeadPrototypesPass() : ModulePass(ID) {
+ initializeStripDeadPrototypesPassPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnModule(Module &M);
};
@@ -37,7 +39,7 @@ public:
char StripDeadPrototypesPass::ID = 0;
INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes",
- "Strip Unused Function Prototypes", false, false);
+ "Strip Unused Function Prototypes", false, false)
bool StripDeadPrototypesPass::runOnModule(Module &M) {
bool MadeChange = false;
diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
index 20b7b8f..a690765 100644
--- a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -39,7 +39,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit StripSymbols(bool ODI = false)
- : ModulePass(ID), OnlyDebugInfo(ODI) {}
+ : ModulePass(ID), OnlyDebugInfo(ODI) {
+ initializeStripSymbolsPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnModule(Module &M);
@@ -52,7 +54,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit StripNonDebugSymbols()
- : ModulePass(ID) {}
+ : ModulePass(ID) {
+ initializeStripNonDebugSymbolsPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnModule(Module &M);
@@ -65,7 +69,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit StripDebugDeclare()
- : ModulePass(ID) {}
+ : ModulePass(ID) {
+ initializeStripDebugDeclarePass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnModule(Module &M);
@@ -78,7 +84,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit StripDeadDebugInfo()
- : ModulePass(ID) {}
+ : ModulePass(ID) {
+ initializeStripDeadDebugInfoPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnModule(Module &M);
@@ -90,7 +98,7 @@ namespace {
char StripSymbols::ID = 0;
INITIALIZE_PASS(StripSymbols, "strip",
- "Strip all symbols from a module", false, false);
+ "Strip all symbols from a module", false, false)
ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
return new StripSymbols(OnlyDebugInfo);
@@ -99,7 +107,7 @@ ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
char StripNonDebugSymbols::ID = 0;
INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug",
"Strip all symbols, except dbg symbols, from a module",
- false, false);
+ false, false)
ModulePass *llvm::createStripNonDebugSymbolsPass() {
return new StripNonDebugSymbols();
@@ -107,7 +115,7 @@ ModulePass *llvm::createStripNonDebugSymbolsPass() {
char StripDebugDeclare::ID = 0;
INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare",
- "Strip all llvm.dbg.declare intrinsics", false, false);
+ "Strip all llvm.dbg.declare intrinsics", false, false)
ModulePass *llvm::createStripDebugDeclarePass() {
return new StripDebugDeclare();
@@ -115,7 +123,7 @@ ModulePass *llvm::createStripDebugDeclarePass() {
char StripDeadDebugInfo::ID = 0;
INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info",
- "Strip debug info for unused symbols", false, false);
+ "Strip debug info for unused symbols", false, false)
ModulePass *llvm::createStripDeadDebugInfoPass() {
return new StripDeadDebugInfo();
diff --git a/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp
index b82b03f..584deac 100644
--- a/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp
@@ -50,7 +50,9 @@ namespace {
virtual bool runOnSCC(CallGraphSCC &SCC);
static char ID; // Pass identification, replacement for typeid
- SRETPromotion() : CallGraphSCCPass(ID) {}
+ SRETPromotion() : CallGraphSCCPass(ID) {
+ initializeSRETPromotionPass(*PassRegistry::getPassRegistry());
+ }
private:
CallGraphNode *PromoteReturn(CallGraphNode *CGN);
@@ -61,8 +63,11 @@ namespace {
}
char SRETPromotion::ID = 0;
-INITIALIZE_PASS(SRETPromotion, "sretpromotion",
- "Promote sret arguments to multiple ret values", false, false);
+INITIALIZE_PASS_BEGIN(SRETPromotion, "sretpromotion",
+ "Promote sret arguments to multiple ret values", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(SRETPromotion, "sretpromotion",
+ "Promote sret arguments to multiple ret values", false, false)
Pass *llvm::createStructRetPromotionPass() {
return new SRETPromotion();
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
index 6f9609c..9c2969c 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
@@ -81,7 +81,9 @@ public:
BuilderTy *Builder;
static char ID; // Pass identification, replacement for typeid
- InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {}
+ InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {
+ initializeInstCombinerPass(*PassRegistry::getPassRegistry());
+ }
public:
virtual bool runOnFunction(Function &F);
@@ -143,6 +145,8 @@ public:
ConstantInt *RHS);
Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
ConstantInt *DivRHS);
+ Instruction *FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *DivI,
+ ConstantInt *DivRHS);
Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI,
ICmpInst::Predicate Pred, Value *TheAdd);
Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
@@ -284,9 +288,16 @@ public:
private:
- /// SimplifyCommutative - This performs a few simplifications for
- /// commutative operators.
- bool SimplifyCommutative(BinaryOperator &I);
+ /// SimplifyAssociativeOrCommutative - This performs a few simplifications for
+ /// operators which are associative or commutative.
+ bool SimplifyAssociativeOrCommutative(BinaryOperator &I);
+
+ /// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
+ /// which some other binary operation distributes over either by factorizing
+ /// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
+ /// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is
+ /// a win). Returns the simplified value, or null if it didn't simplify.
+ Value *SimplifyUsingDistributiveLaws(BinaryOperator &I);
/// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
/// based on the demanded bits.
@@ -310,10 +321,7 @@ private:
// into the PHI (which is only possible if all operands to the PHI are
// constants).
//
- // If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms
- // that would normally be unprofitable because they strongly encourage jump
- // threading.
- Instruction *FoldOpIntoPhi(Instruction &I, bool AllowAggressive = false);
+ Instruction *FoldOpIntoPhi(Instruction &I);
// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
// operator and they all are only used by the PHI, PHI together their
@@ -339,10 +347,6 @@ private:
Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);
-
- unsigned GetOrEnforceKnownAlignment(Value *V,
- unsigned PrefAlign = 0);
-
};
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 4d2c89e..c36a955 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -84,43 +84,37 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
}
Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
- bool Changed = SimplifyCommutative(I);
+ bool Changed = SimplifyAssociativeOrCommutative(I);
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
I.hasNoUnsignedWrap(), TD))
return ReplaceInstUsesWith(I, V);
-
- if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) {
- // X + (signbit) --> X ^ signbit
- const APInt& Val = CI->getValue();
- uint32_t BitWidth = Val.getBitWidth();
- if (Val == APInt::getSignBit(BitWidth))
- return BinaryOperator::CreateXor(LHS, RHS);
-
- // See if SimplifyDemandedBits can simplify this. This handles stuff like
- // (X & 254)+1 -> (X&254)|1
- if (SimplifyDemandedInstructionBits(I))
- return &I;
-
- // zext(bool) + C -> bool ? C + 1 : C
- if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
- if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext()))
- return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
- }
+ // (A*B)+(A*C) -> A*(B+C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
- if (isa<PHINode>(LHS))
- if (Instruction *NV = FoldOpIntoPhi(I))
- return NV;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ // X + (signbit) --> X ^ signbit
+ const APInt &Val = CI->getValue();
+ if (Val.isSignBit())
+ return BinaryOperator::CreateXor(LHS, RHS);
+
+ // See if SimplifyDemandedBits can simplify this. This handles stuff like
+ // (X & 254)+1 -> (X&254)|1
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ // zext(bool) + C -> bool ? C + 1 : C
+ if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
+ if (ZI->getSrcTy()->isIntegerTy(1))
+ return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
- ConstantInt *XorRHS = 0;
- Value *XorLHS = 0;
- if (isa<ConstantInt>(RHSC) &&
- match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
+ Value *XorLHS = 0; ConstantInt *XorRHS = 0;
+ if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
- const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue();
+ const APInt &RHSVal = CI->getValue();
unsigned ExtendAmt = 0;
// If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
// If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
@@ -130,13 +124,13 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
else if (XorRHS->getValue().isPowerOf2())
ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1;
}
-
+
if (ExtendAmt) {
APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt);
if (!MaskedValueIsZero(XorLHS, Mask))
ExtendAmt = 0;
}
-
+
if (ExtendAmt) {
Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt);
Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext");
@@ -145,34 +139,28 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
}
}
+ if (isa<Constant>(RHS) && isa<PHINode>(LHS))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+
if (I.getType()->isIntegerTy(1))
return BinaryOperator::CreateXor(LHS, RHS);
- if (I.getType()->isIntegerTy()) {
- // X + X --> X << 1
- if (LHS == RHS)
- return BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1));
-
- if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) {
- if (RHSI->getOpcode() == Instruction::Sub)
- if (LHS == RHSI->getOperand(1)) // A + (B - A) --> B
- return ReplaceInstUsesWith(I, RHSI->getOperand(0));
- }
- if (Instruction *LHSI = dyn_cast<Instruction>(LHS)) {
- if (LHSI->getOpcode() == Instruction::Sub)
- if (RHS == LHSI->getOperand(1)) // (B - A) + A --> B
- return ReplaceInstUsesWith(I, LHSI->getOperand(0));
- }
+ // X + X --> X << 1
+ if (LHS == RHS) {
+ BinaryOperator *New =
+ BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1));
+ New->setHasNoSignedWrap(I.hasNoSignedWrap());
+ New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+ return New;
}
// -A + B --> B - A
// -A + -B --> -(A + B)
if (Value *LHSV = dyn_castNegVal(LHS)) {
- if (LHS->getType()->isIntOrIntVectorTy()) {
- if (Value *RHSV = dyn_castNegVal(RHS)) {
- Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
- return BinaryOperator::CreateNeg(NewAdd);
- }
+ if (Value *RHSV = dyn_castNegVal(RHS)) {
+ Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
+ return BinaryOperator::CreateNeg(NewAdd);
}
return BinaryOperator::CreateSub(RHS, LHSV);
@@ -199,11 +187,6 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (dyn_castFoldableMul(RHS, C2) == LHS)
return BinaryOperator::CreateMul(LHS, AddOne(C2));
- // X + ~X --> -1 since ~X = -X-1
- if (match(LHS, m_Not(m_Specific(RHS))) ||
- match(RHS, m_Not(m_Specific(LHS))))
- return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
-
// A+B --> A|B iff A and B have no bits set in common.
if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
APInt Mask = APInt::getAllOnesValue(IT->getBitWidth());
@@ -222,7 +205,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
}
// W*X + Y*Z --> W * (X+Z) iff W == Y
- if (I.getType()->isIntOrIntVectorTy()) {
+ {
Value *W, *X, *Y, *Z;
if (match(LHS, m_Mul(m_Value(W), m_Value(X))) &&
match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) {
@@ -251,24 +234,22 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// (X & FF00) + xx00 -> (X+xx00) & FF00
if (LHS->hasOneUse() &&
- match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
- Constant *Anded = ConstantExpr::getAnd(CRHS, C2);
- if (Anded == CRHS) {
- // See if all bits from the first bit set in the Add RHS up are included
- // in the mask. First, get the rightmost bit.
- const APInt &AddRHSV = CRHS->getValue();
-
- // Form a mask of all bits from the lowest bit added through the top.
- APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
-
- // See if the and mask includes all of these bits.
- APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());
-
- if (AddRHSHighBits == AddRHSHighBitsAnd) {
- // Okay, the xform is safe. Insert the new add pronto.
- Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
- return BinaryOperator::CreateAnd(NewAdd, C2);
- }
+ match(LHS, m_And(m_Value(X), m_ConstantInt(C2))) &&
+ CRHS->getValue() == (CRHS->getValue() & C2->getValue())) {
+ // See if all bits from the first bit set in the Add RHS up are included
+ // in the mask. First, get the rightmost bit.
+ const APInt &AddRHSV = CRHS->getValue();
+
+ // Form a mask of all bits from the lowest bit added through the top.
+ APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
+
+ // See if the and mask includes all of these bits.
+ APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());
+
+ if (AddRHSHighBits == AddRHSHighBitsAnd) {
+ // Okay, the xform is safe. Insert the new add pronto.
+ Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
+ return BinaryOperator::CreateAnd(NewAdd, C2);
}
}
@@ -293,12 +274,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// Can we fold the add into the argument of the select?
// We check both true and false select arguments for a matching subtract.
- if (match(FV, m_Zero()) &&
- match(TV, m_Sub(m_Value(N), m_Specific(A))))
+ if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A))))
// Fold the add into the true select value.
return SelectInst::Create(SI->getCondition(), N, A);
- if (match(TV, m_Zero()) &&
- match(FV, m_Sub(m_Value(N), m_Specific(A))))
+
+ if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A))))
// Fold the add into the false select value.
return SelectInst::Create(SI->getCondition(), A, N);
}
@@ -342,7 +322,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
}
Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
- bool Changed = SimplifyCommutative(I);
+ bool Changed = SimplifyAssociativeOrCommutative(I);
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
@@ -424,6 +404,10 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) {
const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext());
Value *Result = Constant::getNullValue(IntPtrTy);
+ // If the GEP is inbounds, we know that none of the addressing operations will
+ // overflow in an unsigned sense.
+ bool isInBounds = cast<GEPOperator>(GEP)->isInBounds();
+
// Build a mask for high order bits.
unsigned IntPtrWidth = TD.getPointerSizeInBits();
uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
@@ -439,16 +423,16 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) {
if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
- Result = Builder->CreateAdd(Result,
- ConstantInt::get(IntPtrTy, Size),
- GEP->getName()+".offs");
+ if (Size)
+ Result = Builder->CreateAdd(Result, ConstantInt::get(IntPtrTy, Size),
+ GEP->getName()+".offs");
continue;
}
Constant *Scale = ConstantInt::get(IntPtrTy, Size);
Constant *OC =
ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
- Scale = ConstantExpr::getMul(OC, Scale);
+ Scale = ConstantExpr::getMul(OC, Scale, isInBounds/*NUW*/);
// Emit an add instruction.
Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");
continue;
@@ -457,9 +441,9 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) {
if (Op->getType() != IntPtrTy)
Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
if (Size != 1) {
- Constant *Scale = ConstantInt::get(IntPtrTy, Size);
// We'll let instcombine(mul) convert this to a shl if possible.
- Op = Builder->CreateMul(Op, Scale, GEP->getName()+".idx");
+ Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size),
+ GEP->getName()+".idx", isInBounds /*NUW*/);
}
// Emit an add instruction.
@@ -545,8 +529,13 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Op0 == Op1) // sub X, X -> 0
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(),
+ I.hasNoUnsignedWrap(), TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // (A*B)-(A*C) -> A*(B-C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
// If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW.
if (Value *V = dyn_castNegVal(Op1)) {
@@ -556,18 +545,14 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return Res;
}
- if (isa<UndefValue>(Op0))
- return ReplaceInstUsesWith(I, Op0); // undef - X -> undef
- if (isa<UndefValue>(Op1))
- return ReplaceInstUsesWith(I, Op1); // X - undef -> undef
if (I.getType()->isIntegerTy(1))
return BinaryOperator::CreateXor(Op0, Op1);
+
+ // Replace (-1 - A) with (~A).
+ if (match(Op0, m_AllOnes()))
+ return BinaryOperator::CreateNot(Op1);
if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
- // Replace (-1 - A) with (~A).
- if (C->isAllOnesValue())
- return BinaryOperator::CreateNot(Op1);
-
// C - ~X == X + (1+C)
Value *X = 0;
if (match(Op1, m_Not(m_Value(X))))
@@ -576,29 +561,16 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// -(X >>u 31) -> (X >>s 31)
// -(X >>s 31) -> (X >>u 31)
if (C->isZero()) {
- if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1)) {
- if (SI->getOpcode() == Instruction::LShr) {
- if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
- // Check to see if we are shifting out everything but the sign bit.
- if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
- SI->getType()->getPrimitiveSizeInBits()-1) {
- // Ok, the transformation is safe. Insert AShr.
- return BinaryOperator::Create(Instruction::AShr,
- SI->getOperand(0), CU, SI->getName());
- }
- }
- } else if (SI->getOpcode() == Instruction::AShr) {
- if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
- // Check to see if we are shifting out everything but the sign bit.
- if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
- SI->getType()->getPrimitiveSizeInBits()-1) {
- // Ok, the transformation is safe. Insert LShr.
- return BinaryOperator::CreateLShr(
- SI->getOperand(0), CU, SI->getName());
- }
- }
- }
- }
+ Value *X; ConstantInt *CI;
+ if (match(Op1, m_LShr(m_Value(X), m_ConstantInt(CI))) &&
+ // Verify we are shifting out everything but the sign bit.
+ CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1)
+ return BinaryOperator::CreateAShr(X, CI);
+
+ if (match(Op1, m_AShr(m_Value(X), m_ConstantInt(CI))) &&
+ // Verify we are shifting out everything but the sign bit.
+ CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1)
+ return BinaryOperator::CreateLShr(X, CI);
}
// Try to fold constant sub into select arguments.
@@ -608,86 +580,80 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// C - zext(bool) -> bool ? C - 1 : C
if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1))
- if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext()))
+ if (ZI->getSrcTy()->isIntegerTy(1))
return SelectInst::Create(ZI->getOperand(0), SubOne(C), C);
+
+ // C-(X+C2) --> (C-C2)-X
+ ConstantInt *C2;
+ if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2))))
+ return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
}
- if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
- if (Op1I->getOpcode() == Instruction::Add) {
- if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y
- return BinaryOperator::CreateNeg(Op1I->getOperand(1),
- I.getName());
- else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y
- return BinaryOperator::CreateNeg(Op1I->getOperand(0),
- I.getName());
- else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {
- if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))
- // C1-(X+C2) --> (C1-C2)-X
- return BinaryOperator::CreateSub(
- ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0));
- }
+
+ { Value *Y;
+ // X-(X+Y) == -Y X-(Y+X) == -Y
+ if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) ||
+ match(Op1, m_Add(m_Value(Y), m_Specific(Op0))))
+ return BinaryOperator::CreateNeg(Y);
+
+ // (X-Y)-X == -Y
+ if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y))))
+ return BinaryOperator::CreateNeg(Y);
+ }
+
+ if (Op1->hasOneUse()) {
+ Value *X = 0, *Y = 0, *Z = 0;
+ Constant *C = 0;
+ ConstantInt *CI = 0;
+
+ // (X - (Y - Z)) --> (X + (Z - Y)).
+ if (match(Op1, m_Sub(m_Value(Y), m_Value(Z))))
+ return BinaryOperator::CreateAdd(Op0,
+ Builder->CreateSub(Z, Y, Op1->getName()));
+
+ // (X - (X & Y)) --> (X & ~Y)
+ //
+ if (match(Op1, m_And(m_Value(Y), m_Specific(Op0))) ||
+ match(Op1, m_And(m_Specific(Op0), m_Value(Y))))
+ return BinaryOperator::CreateAnd(Op0,
+ Builder->CreateNot(Y, Y->getName() + ".not"));
+
+ // 0 - (X sdiv C) -> (X sdiv -C)
+ if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) &&
+ match(Op0, m_Zero()))
+ return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C));
+
+ // 0 - (X << Y) -> (-X << Y) when X is freely negatable.
+ if (match(Op1, m_Shl(m_Value(X), m_Value(Y))) && match(Op0, m_Zero()))
+ if (Value *XNeg = dyn_castNegVal(X))
+ return BinaryOperator::CreateShl(XNeg, Y);
+
+ // X - X*C --> X * (1-C)
+ if (match(Op1, m_Mul(m_Specific(Op0), m_ConstantInt(CI)))) {
+ Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(),1), CI);
+ return BinaryOperator::CreateMul(Op0, CP1);
}
- if (Op1I->hasOneUse()) {
- // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression
- // is not used by anyone else...
- //
- if (Op1I->getOpcode() == Instruction::Sub) {
- // Swap the two operands of the subexpr...
- Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1);
- Op1I->setOperand(0, IIOp1);
- Op1I->setOperand(1, IIOp0);
-
- // Create the new top level add instruction...
- return BinaryOperator::CreateAdd(Op0, Op1);
- }
-
- // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)...
- //
- if (Op1I->getOpcode() == Instruction::And &&
- (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) {
- Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0);
-
- Value *NewNot = Builder->CreateNot(OtherOp, "B.not");
- return BinaryOperator::CreateAnd(Op0, NewNot);
- }
-
- // 0 - (X sdiv C) -> (X sdiv -C)
- if (Op1I->getOpcode() == Instruction::SDiv)
- if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
- if (CSI->isZero())
- if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1)))
- return BinaryOperator::CreateSDiv(Op1I->getOperand(0),
- ConstantExpr::getNeg(DivRHS));
-
- // 0 - (C << X) -> (-C << X)
- if (Op1I->getOpcode() == Instruction::Shl)
- if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
- if (CSI->isZero())
- if (Value *ShlLHSNeg = dyn_castNegVal(Op1I->getOperand(0)))
- return BinaryOperator::CreateShl(ShlLHSNeg, Op1I->getOperand(1));
-
- // X - X*C --> X * (1-C)
- ConstantInt *C2 = 0;
- if (dyn_castFoldableMul(Op1I, C2) == Op0) {
- Constant *CP1 =
- ConstantExpr::getSub(ConstantInt::get(I.getType(), 1),
- C2);
- return BinaryOperator::CreateMul(Op0, CP1);
- }
+ // X - X<<C --> X * (1-(1<<C))
+ if (match(Op1, m_Shl(m_Specific(Op0), m_ConstantInt(CI)))) {
+ Constant *One = ConstantInt::get(I.getType(), 1);
+ C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI));
+ return BinaryOperator::CreateMul(Op0, C);
}
- }
-
- if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
- if (Op0I->getOpcode() == Instruction::Add) {
- if (Op0I->getOperand(0) == Op1) // (Y+X)-Y == X
- return ReplaceInstUsesWith(I, Op0I->getOperand(1));
- else if (Op0I->getOperand(1) == Op1) // (X+Y)-Y == X
- return ReplaceInstUsesWith(I, Op0I->getOperand(0));
- } else if (Op0I->getOpcode() == Instruction::Sub) {
- if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y
- return BinaryOperator::CreateNeg(Op0I->getOperand(1),
- I.getName());
+
+ // X - A*-B -> X + A*B
+ // X - -A*B -> X + A*B
+ Value *A, *B;
+ if (match(Op1, m_Mul(m_Value(A), m_Neg(m_Value(B)))) ||
+ match(Op1, m_Mul(m_Neg(m_Value(A)), m_Value(B))))
+ return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B));
+
+ // X - A*CI -> X + A*-CI
+ // X - CI*A -> X + A*-CI
+ if (match(Op1, m_Mul(m_Value(A), m_ConstantInt(CI))) ||
+ match(Op1, m_Mul(m_ConstantInt(CI), m_Value(A)))) {
+ Value *NewMul = Builder->CreateMul(A, ConstantExpr::getNeg(CI));
+ return BinaryOperator::CreateAdd(Op0, NewMul);
}
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 19a05bf..b6b6b84 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -172,7 +172,9 @@ static Value *getFCmpValue(bool isordered, unsigned code,
case 4: Pred = isordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; break;
case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break;
case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break;
- case 7: return ConstantInt::getTrue(LHS->getContext());
+ case 7:
+ if (!isordered) return ConstantInt::getTrue(LHS->getContext());
+ Pred = FCmpInst::FCMP_ORD; break;
}
return Builder->CreateFCmp(Pred, LHS, RHS);
}
@@ -207,15 +209,26 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
}
break;
case Instruction::Or:
- if (Together == AndRHS) // (X | C) & C --> C
- return ReplaceInstUsesWith(TheAnd, AndRHS);
-
- if (Op->hasOneUse() && Together != OpRHS) {
- // (X | C1) & C2 --> (X | (C1&C2)) & C2
- Value *Or = Builder->CreateOr(X, Together);
- Or->takeName(Op);
- return BinaryOperator::CreateAnd(Or, AndRHS);
+ if (Op->hasOneUse()){
+ if (Together != OpRHS) {
+ // (X | C1) & C2 --> (X | (C1&C2)) & C2
+ Value *Or = Builder->CreateOr(X, Together);
+ Or->takeName(Op);
+ return BinaryOperator::CreateAnd(Or, AndRHS);
+ }
+
+ ConstantInt *TogetherCI = dyn_cast<ConstantInt>(Together);
+ if (TogetherCI && !TogetherCI->isZero()){
+ // (X | C1) & C2 --> (X & (C2^(C1&C2))) | C1
+ // NOTE: This reduces the number of bits set in the & mask, which
+ // can expose opportunities for store narrowing.
+ Together = ConstantExpr::getXor(AndRHS, Together);
+ Value *And = Builder->CreateAnd(X, Together);
+ And->takeName(Op);
+ return BinaryOperator::CreateOr(And, OpRHS);
+ }
}
+
break;
case Instruction::Add:
if (Op->hasOneUse()) {
@@ -261,10 +274,11 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
ConstantInt *CI = ConstantInt::get(AndRHS->getContext(),
AndRHS->getValue() & ShlMask);
- if (CI->getValue() == ShlMask) {
- // Masking out bits that the shift already masks
+ if (CI->getValue() == ShlMask)
+ // Masking out bits that the shift already masks.
return ReplaceInstUsesWith(TheAnd, Op); // No need for the and.
- } else if (CI != AndRHS) { // Reducing bits set in and.
+
+ if (CI != AndRHS) { // Reducing bits set in and.
TheAnd.setOperand(1, CI);
return &TheAnd;
}
@@ -281,10 +295,11 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
ConstantInt *CI = ConstantInt::get(Op->getContext(),
AndRHS->getValue() & ShrMask);
- if (CI->getValue() == ShrMask) {
- // Masking out bits that the shift already masks.
+ if (CI->getValue() == ShrMask)
+ // Masking out bits that the shift already masks.
return ReplaceInstUsesWith(TheAnd, Op);
- } else if (CI != AndRHS) {
+
+ if (CI != AndRHS) {
TheAnd.setOperand(1, CI); // Reduce bits set in and cst.
return &TheAnd;
}
@@ -434,6 +449,270 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
}
+/// enum for classifying (icmp eq (A & B), C) and (icmp ne (A & B), C)
+/// One of A and B is considered the mask, the other the value. This is
+/// described as the "AMask" or "BMask" part of the enum. If the enum
+/// contains only "Mask", then both A and B can be considered masks.
+/// If A is the mask, then it was proven, that (A & C) == C. This
+/// is trivial if C == A, or C == 0. If both A and C are constants, this
+/// proof is also easy.
+/// For the following explanations we assume that A is the mask.
+/// The part "AllOnes" declares, that the comparison is true only
+/// if (A & B) == A, or all bits of A are set in B.
+/// Example: (icmp eq (A & 3), 3) -> FoldMskICmp_AMask_AllOnes
+/// The part "AllZeroes" declares, that the comparison is true only
+/// if (A & B) == 0, or all bits of A are cleared in B.
+/// Example: (icmp eq (A & 3), 0) -> FoldMskICmp_Mask_AllZeroes
+/// The part "Mixed" declares, that (A & B) == C and C might or might not
+/// contain any number of one bits and zero bits.
+/// Example: (icmp eq (A & 3), 1) -> FoldMskICmp_AMask_Mixed
+/// The Part "Not" means, that in above descriptions "==" should be replaced
+/// by "!=".
+/// Example: (icmp ne (A & 3), 3) -> FoldMskICmp_AMask_NotAllOnes
+/// If the mask A contains a single bit, then the following is equivalent:
+/// (icmp eq (A & B), A) equals (icmp ne (A & B), 0)
+/// (icmp ne (A & B), A) equals (icmp eq (A & B), 0)
+enum MaskedICmpType {
+ FoldMskICmp_AMask_AllOnes = 1,
+ FoldMskICmp_AMask_NotAllOnes = 2,
+ FoldMskICmp_BMask_AllOnes = 4,
+ FoldMskICmp_BMask_NotAllOnes = 8,
+ FoldMskICmp_Mask_AllZeroes = 16,
+ FoldMskICmp_Mask_NotAllZeroes = 32,
+ FoldMskICmp_AMask_Mixed = 64,
+ FoldMskICmp_AMask_NotMixed = 128,
+ FoldMskICmp_BMask_Mixed = 256,
+ FoldMskICmp_BMask_NotMixed = 512
+};
+
+/// return the set of pattern classes (from MaskedICmpType)
+/// that (icmp SCC (A & B), C) satisfies
+static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
+ ICmpInst::Predicate SCC)
+{
+ ConstantInt *ACst = dyn_cast<ConstantInt>(A);
+ ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+ ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+ bool icmp_eq = (SCC == ICmpInst::ICMP_EQ);
+ bool icmp_abit = (ACst != 0 && !ACst->isZero() &&
+ ACst->getValue().isPowerOf2());
+ bool icmp_bbit = (BCst != 0 && !BCst->isZero() &&
+ BCst->getValue().isPowerOf2());
+ unsigned result = 0;
+ if (CCst != 0 && CCst->isZero()) {
+ // if C is zero, then both A and B qualify as mask
+ result |= (icmp_eq ? (FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_AMask_Mixed |
+ FoldMskICmp_BMask_Mixed)
+ : (FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_AMask_NotMixed |
+ FoldMskICmp_BMask_NotMixed));
+ if (icmp_abit)
+ result |= (icmp_eq ? (FoldMskICmp_AMask_NotAllOnes |
+ FoldMskICmp_AMask_NotMixed)
+ : (FoldMskICmp_AMask_AllOnes |
+ FoldMskICmp_AMask_Mixed));
+ if (icmp_bbit)
+ result |= (icmp_eq ? (FoldMskICmp_BMask_NotAllOnes |
+ FoldMskICmp_BMask_NotMixed)
+ : (FoldMskICmp_BMask_AllOnes |
+ FoldMskICmp_BMask_Mixed));
+ return result;
+ }
+ if (A == C) {
+ result |= (icmp_eq ? (FoldMskICmp_AMask_AllOnes |
+ FoldMskICmp_AMask_Mixed)
+ : (FoldMskICmp_AMask_NotAllOnes |
+ FoldMskICmp_AMask_NotMixed));
+ if (icmp_abit)
+ result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_AMask_NotMixed)
+ : (FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_AMask_Mixed));
+ }
+ else if (ACst != 0 && CCst != 0 &&
+ ConstantExpr::getAnd(ACst, CCst) == CCst) {
+ result |= (icmp_eq ? FoldMskICmp_AMask_Mixed
+ : FoldMskICmp_AMask_NotMixed);
+ }
+ if (B == C)
+ {
+ result |= (icmp_eq ? (FoldMskICmp_BMask_AllOnes |
+ FoldMskICmp_BMask_Mixed)
+ : (FoldMskICmp_BMask_NotAllOnes |
+ FoldMskICmp_BMask_NotMixed));
+ if (icmp_bbit)
+ result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_BMask_NotMixed)
+ : (FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_BMask_Mixed));
+ }
+ else if (BCst != 0 && CCst != 0 &&
+ ConstantExpr::getAnd(BCst, CCst) == CCst) {
+ result |= (icmp_eq ? FoldMskICmp_BMask_Mixed
+ : FoldMskICmp_BMask_NotMixed);
+ }
+ return result;
+}
+
+/// foldLogOpOfMaskedICmpsHelper:
+/// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// return the set of pattern classes (from MaskedICmpType)
+/// that both LHS and RHS satisfy
+static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
+ Value*& B, Value*& C,
+ Value*& D, Value*& E,
+ ICmpInst *LHS, ICmpInst *RHS) {
+ ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+ if (LHSCC != ICmpInst::ICMP_EQ && LHSCC != ICmpInst::ICMP_NE) return 0;
+ if (RHSCC != ICmpInst::ICMP_EQ && RHSCC != ICmpInst::ICMP_NE) return 0;
+ if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) return 0;
+ // vectors are not (yet?) supported
+ if (LHS->getOperand(0)->getType()->isVectorTy()) return 0;
+
+ // Here comes the tricky part:
+ // LHS might be of the form L11 & L12 == X, X == L21 & L22,
+ // and L11 & L12 == L21 & L22. The same goes for RHS.
+ // Now we must find those components L** and R**, that are equal, so
+ // that we can extract the parameters A, B, C, D, and E for the canonical
+ // above.
+ Value *L1 = LHS->getOperand(0);
+ Value *L2 = LHS->getOperand(1);
+ Value *L11,*L12,*L21,*L22;
+ if (match(L1, m_And(m_Value(L11), m_Value(L12)))) {
+ if (!match(L2, m_And(m_Value(L21), m_Value(L22))))
+ L21 = L22 = 0;
+ }
+ else {
+ if (!match(L2, m_And(m_Value(L11), m_Value(L12))))
+ return 0;
+ std::swap(L1, L2);
+ L21 = L22 = 0;
+ }
+
+ Value *R1 = RHS->getOperand(0);
+ Value *R2 = RHS->getOperand(1);
+ Value *R11,*R12;
+ bool ok = false;
+ if (match(R1, m_And(m_Value(R11), m_Value(R12)))) {
+ if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) {
+ A = R11; D = R12; E = R2; ok = true;
+ }
+ else
+ if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) {
+ A = R12; D = R11; E = R2; ok = true;
+ }
+ }
+ if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) {
+ if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) {
+ A = R11; D = R12; E = R1; ok = true;
+ }
+ else
+ if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) {
+ A = R12; D = R11; E = R1; ok = true;
+ }
+ else
+ return 0;
+ }
+ if (!ok)
+ return 0;
+
+ if (L11 == A) {
+ B = L12; C = L2;
+ }
+ else if (L12 == A) {
+ B = L11; C = L2;
+ }
+ else if (L21 == A) {
+ B = L22; C = L1;
+ }
+ else if (L22 == A) {
+ B = L21; C = L1;
+ }
+
+ unsigned left_type = getTypeOfMaskedICmp(A, B, C, LHSCC);
+ unsigned right_type = getTypeOfMaskedICmp(A, D, E, RHSCC);
+ return left_type & right_type;
+}
+/// foldLogOpOfMaskedICmps:
+/// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// into a single (icmp(A & X) ==/!= Y)
+static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
+ ICmpInst::Predicate NEWCC,
+ llvm::InstCombiner::BuilderTy* Builder) {
+ Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0;
+ unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS);
+ if (mask == 0) return 0;
+
+ if (NEWCC == ICmpInst::ICMP_NE)
+ mask >>= 1; // treat "Not"-states as normal states
+
+ if (mask & FoldMskICmp_Mask_AllZeroes) {
+ // (icmp eq (A & B), 0) & (icmp eq (A & D), 0)
+ // -> (icmp eq (A & (B|D)), 0)
+ Value* newOr = Builder->CreateOr(B, D);
+ Value* newAnd = Builder->CreateAnd(A, newOr);
+ // we can't use C as zero, because we might actually handle
+ // (icmp ne (A & B), B) & (icmp ne (A & D), D)
+ // with B and D, having a single bit set
+ Value* zero = Constant::getNullValue(A->getType());
+ return Builder->CreateICmp(NEWCC, newAnd, zero);
+ }
+ else if (mask & FoldMskICmp_BMask_AllOnes) {
+ // (icmp eq (A & B), B) & (icmp eq (A & D), D)
+ // -> (icmp eq (A & (B|D)), (B|D))
+ Value* newOr = Builder->CreateOr(B, D);
+ Value* newAnd = Builder->CreateAnd(A, newOr);
+ return Builder->CreateICmp(NEWCC, newAnd, newOr);
+ }
+ else if (mask & FoldMskICmp_AMask_AllOnes) {
+ // (icmp eq (A & B), A) & (icmp eq (A & D), A)
+ // -> (icmp eq (A & (B&D)), A)
+ Value* newAnd1 = Builder->CreateAnd(B, D);
+ Value* newAnd = Builder->CreateAnd(A, newAnd1);
+ return Builder->CreateICmp(NEWCC, newAnd, A);
+ }
+ else if (mask & FoldMskICmp_BMask_Mixed) {
+ // (icmp eq (A & B), C) & (icmp eq (A & D), E)
+ // We already know that B & C == C && D & E == E.
+ // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of
+ // C and E, which are shared by both the mask B and the mask D, don't
+ // contradict, then we can transform to
+ // -> (icmp eq (A & (B|D)), (C|E))
+ // Currently, we only handle the case of B, C, D, and E being constant.
+ ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+ if (BCst == 0) return 0;
+ ConstantInt *DCst = dyn_cast<ConstantInt>(D);
+ if (DCst == 0) return 0;
+ // we can't simply use C and E, because we might actually handle
+ // (icmp ne (A & B), B) & (icmp eq (A & D), D)
+ // with B and D, having a single bit set
+
+ ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+ if (CCst == 0) return 0;
+ if (LHS->getPredicate() != NEWCC)
+ CCst = dyn_cast<ConstantInt>( ConstantExpr::getXor(BCst, CCst) );
+ ConstantInt *ECst = dyn_cast<ConstantInt>(E);
+ if (ECst == 0) return 0;
+ if (RHS->getPredicate() != NEWCC)
+ ECst = dyn_cast<ConstantInt>( ConstantExpr::getXor(DCst, ECst) );
+ ConstantInt* MCst = dyn_cast<ConstantInt>(
+ ConstantExpr::getAnd(ConstantExpr::getAnd(BCst, DCst),
+ ConstantExpr::getXor(CCst, ECst)) );
+ // if there is a conflict we should actually return a false for the
+ // whole construct
+ if (!MCst->isZero())
+ return 0;
+ Value *newOr1 = Builder->CreateOr(B, D);
+ Value *newOr2 = ConstantExpr::getOr(CCst, ECst);
+ Value *newAnd = Builder->CreateAnd(A, newOr1);
+ return Builder->CreateICmp(NEWCC, newAnd, newOr2);
+ }
+ return 0;
+}
+
/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
@@ -451,6 +730,10 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
return getICmpValue(isSigned, Code, Op0, Op1, Builder);
}
}
+
+ // handle (roughly): (icmp eq (A & B), C) & (icmp eq (A & D), E)
+ if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder))
+ return V;
// This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
@@ -472,22 +755,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
Value *NewOr = Builder->CreateOr(Val, Val2);
return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
}
-
- // (icmp ne (A & C1), 0) & (icmp ne (A & C2), 0) -->
- // (icmp eq (A & (C1|C2)), (C1|C2)) where C1 and C2 are non-zero POT
- if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
- Value *Op1 = 0, *Op2 = 0;
- ConstantInt *CI1 = 0, *CI2 = 0;
- if (match(LHS->getOperand(0), m_And(m_Value(Op1), m_ConstantInt(CI1))) &&
- match(RHS->getOperand(0), m_And(m_Value(Op2), m_ConstantInt(CI2)))) {
- if (Op1 == Op2 && !CI1->isZero() && !CI2->isZero() &&
- CI1->getValue().isPowerOf2() && CI2->getValue().isPowerOf2()) {
- Constant *ConstOr = ConstantExpr::getOr(CI1, CI2);
- Value *NewAnd = Builder->CreateAnd(Op1, ConstOr);
- return Builder->CreateICmp(ICmpInst::ICMP_EQ, NewAnd, ConstOr);
- }
- }
- }
}
// From here on, we only handle:
@@ -712,12 +979,16 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
- bool Changed = SimplifyCommutative(I);
+ bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
if (Value *V = SimplifyAndInst(Op0, Op1, TD))
return ReplaceInstUsesWith(I, V);
+ // (A|B)&(A|C) -> A|(B&C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
+
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
@@ -725,7 +996,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
const APInt &AndRHSMask = AndRHS->getValue();
- APInt NotAndRHS(~AndRHSMask);
// Optimize a variety of ((val OP C1) & C2) combinations...
if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
@@ -734,10 +1004,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
switch (Op0I->getOpcode()) {
default: break;
case Instruction::Xor:
- case Instruction::Or:
+ case Instruction::Or: {
// If the mask is only needed on one incoming arm, push it up.
if (!Op0I->hasOneUse()) break;
+ APInt NotAndRHS(~AndRHSMask);
if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
// Not masking anything out for the LHS, move to RHS.
Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
@@ -753,6 +1024,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
}
break;
+ }
case Instruction::Add:
// ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS.
// ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
@@ -772,14 +1044,12 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
// (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS
// has 1's for all bits that the subtraction with A might affect.
- if (Op0I->hasOneUse()) {
+ if (Op0I->hasOneUse() && !match(Op0LHS, m_Zero())) {
uint32_t BitWidth = AndRHSMask.getBitWidth();
uint32_t Zeros = AndRHSMask.countLeadingZeros();
APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros);
- ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS);
- if (!(A && A->isZero()) && // avoid infinite recursion.
- MaskedValueIsZero(Op0LHS, Mask)) {
+ if (MaskedValueIsZero(Op0LHS, Mask)) {
Value *NewNeg = Builder->CreateNeg(Op0RHS);
return BinaryOperator::CreateAnd(NewNeg, AndRHS);
}
@@ -797,39 +1067,25 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
}
break;
}
-
+
if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I))
return Res;
- } else if (CastInst *CI = dyn_cast<CastInst>(Op0)) {
- // If this is an integer truncation or change from signed-to-unsigned, and
- // if the source is an and/or with immediate, transform it. This
- // frequently occurs for bitfield accesses.
- if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) {
- if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) &&
- CastOp->getNumOperands() == 2)
- if (ConstantInt *AndCI =dyn_cast<ConstantInt>(CastOp->getOperand(1))){
- if (CastOp->getOpcode() == Instruction::And) {
- // Change: and (cast (and X, C1) to T), C2
- // into : and (cast X to T), trunc_or_bitcast(C1)&C2
- // This will fold the two constants together, which may allow
- // other simplifications.
- Value *NewCast = Builder->CreateTruncOrBitCast(
- CastOp->getOperand(0), I.getType(),
- CastOp->getName()+".shrunk");
- // trunc_or_bitcast(C1)&C2
- Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
- C3 = ConstantExpr::getAnd(C3, AndRHS);
- return BinaryOperator::CreateAnd(NewCast, C3);
- } else if (CastOp->getOpcode() == Instruction::Or) {
- // Change: and (cast (or X, C1) to T), C2
- // into : trunc(C1)&C2 iff trunc(C1)&C2 == C2
- Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
- if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS)
- // trunc(C1)&C2
- return ReplaceInstUsesWith(I, AndRHS);
- }
- }
+ }
+
+ // If this is an integer truncation, and if the source is an 'and' with
+ // immediate, transform it. This frequently occurs for bitfield accesses.
+ {
+ Value *X = 0; ConstantInt *YC = 0;
+ if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) {
+ // Change: and (trunc (and X, YC) to T), C2
+ // into : and (trunc X to T), trunc(YC) & C2
+ // This will fold the two constants together, which may allow
+ // other simplifications.
+ Value *NewCast = Builder->CreateTrunc(X, I.getType(), "and.shrunk");
+ Constant *C3 = ConstantExpr::getTrunc(YC, I.getType());
+ C3 = ConstantExpr::getAnd(C3, AndRHS);
+ return BinaryOperator::CreateAnd(NewCast, C3);
}
}
@@ -851,7 +1107,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
I.getName()+".demorgan");
return BinaryOperator::CreateNot(Or);
}
-
+
{
Value *A = 0, *B = 0, *C = 0, *D = 0;
// (A|B) & ~(A&B) -> A^B
@@ -884,7 +1140,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
cast<BinaryOperator>(Op1)->swapOperands();
std::swap(A, B);
}
- if (A == Op0) // A&(A^B) -> A & ~B
+ // Notice that the patten (A&(~B)) is actually (A&(-1^B)), so if
+ // A is originally -1 (or a vector of -1 and undefs), then we enter
+ // an endless loop. By checking that A is non-constant we ensure that
+ // we will never get to the loop.
+ if (A == Op0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B
return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp"));
}
@@ -1160,7 +1420,12 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
return getICmpValue(isSigned, Code, Op0, Op1, Builder);
}
}
-
+
+ // handle (roughly):
+ // (icmp ne (A & B), C) | (icmp ne (A & D), E)
+ if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_NE, Builder))
+ return V;
+
// This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
@@ -1173,24 +1438,17 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
Value *NewOr = Builder->CreateOr(Val, Val2);
return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
}
-
- // (icmp eq (A & C1), 0) | (icmp eq (A & C2), 0) -->
- // (icmp ne (A & (C1|C2)), (C1|C2)) where C1 and C2 are non-zero POT
- if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
- Value *Op1 = 0, *Op2 = 0;
- ConstantInt *CI1 = 0, *CI2 = 0;
- if (match(LHS->getOperand(0), m_And(m_Value(Op1), m_ConstantInt(CI1))) &&
- match(RHS->getOperand(0), m_And(m_Value(Op2), m_ConstantInt(CI2)))) {
- if (Op1 == Op2 && !CI1->isZero() && !CI2->isZero() &&
- CI1->getValue().isPowerOf2() && CI2->getValue().isPowerOf2()) {
- Constant *ConstOr = ConstantExpr::getOr(CI1, CI2);
- Value *NewAnd = Builder->CreateAnd(Op1, ConstOr);
- return Builder->CreateICmp(ICmpInst::ICMP_NE, NewAnd, ConstOr);
- }
- }
- }
}
-
+
+ // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1)
+ // iff C2 + CA == C1.
+ if (LHSCC == ICmpInst::ICMP_ULT && RHSCC == ICmpInst::ICMP_EQ) {
+ ConstantInt *AddCst;
+ if (match(Val, m_Add(m_Specific(Val2), m_ConstantInt(AddCst))))
+ if (RHSCst->getValue() + AddCst->getValue() == LHSCst->getValue())
+ return Builder->CreateICmpULE(Val, LHSCst);
+ }
+
// From here on, we only handle:
// (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
if (Val != Val2) return 0;
@@ -1429,12 +1687,16 @@ Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
}
Instruction *InstCombiner::visitOr(BinaryOperator &I) {
- bool Changed = SimplifyCommutative(I);
+ bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
if (Value *V = SimplifyOrInst(Op0, Op1, TD))
return ReplaceInstUsesWith(I, V);
+ // (A&B)|(A&C) -> A&(B|C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
+
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
@@ -1481,8 +1743,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible.
if (match(Op0, m_Or(m_Value(), m_Value())) ||
match(Op1, m_Or(m_Value(), m_Value())) ||
- (match(Op0, m_Shift(m_Value(), m_Value())) &&
- match(Op1, m_Shift(m_Value(), m_Value())))) {
+ (match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
+ match(Op1, m_LogicalShift(m_Value(), m_Value())))) {
if (Instruction *BSwap = MatchBSwap(I))
return BSwap;
}
@@ -1509,7 +1771,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
Value *C = 0, *D = 0;
if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
match(Op1, m_And(m_Value(B), m_Value(D)))) {
- Value *V1 = 0, *V2 = 0, *V3 = 0;
+ Value *V1 = 0, *V2 = 0;
C1 = dyn_cast<ConstantInt>(C);
C2 = dyn_cast<ConstantInt>(D);
if (C1 && C2) { // (A & C1)|(B & C2)
@@ -1567,25 +1829,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
}
}
}
-
- // Check to see if we have any common things being and'ed. If so, find the
- // terms for V1 & (V2|V3).
- if (Op0->hasOneUse() || Op1->hasOneUse()) {
- V1 = 0;
- if (A == B) // (A & C)|(A & D) == A & (C|D)
- V1 = A, V2 = C, V3 = D;
- else if (A == D) // (A & C)|(B & A) == A & (B|C)
- V1 = A, V2 = B, V3 = C;
- else if (C == B) // (A & C)|(C & D) == C & (A|D)
- V1 = C, V2 = A, V3 = D;
- else if (C == D) // (A & C)|(B & C) == C & (A|B)
- V1 = C, V2 = A, V3 = B;
-
- if (V1) {
- Value *Or = Builder->CreateOr(V2, V3, "tmp");
- return BinaryOperator::CreateAnd(V1, Or);
- }
- }
// (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants.
// Don't do this for vector select idioms, the code generator doesn't handle
@@ -1667,65 +1910,69 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// fold (or (cast A), (cast B)) -> (cast (or A, B))
if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
- if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
- if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
- const Type *SrcTy = Op0C->getOperand(0)->getType();
- if (SrcTy == Op1C->getOperand(0)->getType() &&
- SrcTy->isIntOrIntVectorTy()) {
- Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
-
- if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) &&
- // Only do this if the casts both really cause code to be
- // generated.
- ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
- ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
- Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName());
- return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
- }
-
- // If this is or(cast(icmp), cast(icmp)), try to fold this even if the
- // cast is otherwise not optimizable. This happens for vector sexts.
- if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
- if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
- if (Value *Res = FoldOrOfICmps(LHS, RHS))
- return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
-
- // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
- // cast is otherwise not optimizable. This happens for vector sexts.
- if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
- if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
- if (Value *Res = FoldOrOfFCmps(LHS, RHS))
- return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+ CastInst *Op1C = dyn_cast<CastInst>(Op1);
+ if (Op1C && Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
+ const Type *SrcTy = Op0C->getOperand(0)->getType();
+ if (SrcTy == Op1C->getOperand(0)->getType() &&
+ SrcTy->isIntOrIntVectorTy()) {
+ Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+
+ if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) &&
+ // Only do this if the casts both really cause code to be
+ // generated.
+ ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
+ ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
+ Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName());
+ return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
}
+
+ // If this is or(cast(icmp), cast(icmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
+ if (Value *Res = FoldOrOfICmps(LHS, RHS))
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+
+ // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
+ if (Value *Res = FoldOrOfFCmps(LHS, RHS))
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
}
+ }
+ }
+
+ // Note: If we've gotten to the point of visiting the outer OR, then the
+ // inner one couldn't be simplified. If it was a constant, then it won't
+ // be simplified by a later pass either, so we try swapping the inner/outer
+ // ORs in the hopes that we'll be able to simplify it this way.
+ // (X|C) | V --> (X|V) | C
+ if (Op0->hasOneUse() && !isa<ConstantInt>(Op1) &&
+ match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) {
+ Value *Inner = Builder->CreateOr(A, Op1);
+ Inner->takeName(Op0);
+ return BinaryOperator::CreateOr(Inner, C1);
}
return Changed ? &I : 0;
}
Instruction *InstCombiner::visitXor(BinaryOperator &I) {
- bool Changed = SimplifyCommutative(I);
+ bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (isa<UndefValue>(Op1)) {
- if (isa<UndefValue>(Op0))
- // Handle undef ^ undef -> 0 special case. This is a common
- // idiom (misuse).
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
- return ReplaceInstUsesWith(I, Op1); // X ^ undef -> undef
- }
+ if (Value *V = SimplifyXorInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // (A&B)^(A&C) -> A&(B^C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
- // xor X, X = 0
- if (Op0 == Op1)
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
return &I;
- if (I.getType()->isVectorTy())
- if (isa<ConstantAggregateZero>(Op1))
- return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X
// Is this a ~ operation?
if (Value *NotOp = dyn_castNotVal(&I)) {
@@ -1844,15 +2091,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
return NV;
}
- if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1
- if (X == Op1)
- return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
-
- if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1
- if (X == Op0)
- return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
-
-
BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1);
if (Op1I) {
Value *A, *B;
@@ -1865,10 +2103,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
I.swapOperands(); // Simplified below.
std::swap(Op0, Op1);
}
- } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) {
- return ReplaceInstUsesWith(I, B); // A^(A^B) == B
- } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) {
- return ReplaceInstUsesWith(I, A); // A^(B^A) == B
} else if (match(Op1I, m_And(m_Value(A), m_Value(B))) &&
Op1I->hasOneUse()){
if (A == Op0) { // A^(A&B) -> A^(B&A)
@@ -1891,10 +2125,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
std::swap(A, B);
if (B == Op1) // (A|B)^B == A & ~B
return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp"));
- } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) {
- return ReplaceInstUsesWith(I, B); // (A^B)^A == B
- } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) {
- return ReplaceInstUsesWith(I, A); // (B^A)^A == B
} else if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
Op0I->hasOneUse()){
if (A == Op1) // (A&B)^A -> (B&A)^A
@@ -1932,29 +2162,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if ((A == C && B == D) || (A == D && B == C))
return BinaryOperator::CreateXor(A, B);
}
-
- // (A & B)^(C & D)
- if ((Op0I->hasOneUse() || Op1I->hasOneUse()) &&
- match(Op0I, m_And(m_Value(A), m_Value(B))) &&
- match(Op1I, m_And(m_Value(C), m_Value(D)))) {
- // (X & Y)^(X & Y) -> (Y^Z) & X
- Value *X = 0, *Y = 0, *Z = 0;
- if (A == C)
- X = A, Y = B, Z = D;
- else if (A == D)
- X = A, Y = B, Z = C;
- else if (B == C)
- X = B, Y = A, Z = D;
- else if (B == D)
- X = B, Y = A, Z = C;
-
- if (X) {
- Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName());
- return BinaryOperator::CreateAnd(NewOp, X);
- }
- }
}
-
+
// (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 0ebe3b4..8449f7b 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -17,6 +17,7 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
/// getPromotedType - Return the specified type promoted as it would be to pass
@@ -29,100 +30,10 @@ static const Type *getPromotedType(const Type *Ty) {
return Ty;
}
-/// EnforceKnownAlignment - If the specified pointer points to an object that
-/// we control, modify the object's alignment to PrefAlign. This isn't
-/// often possible though. If alignment is important, a more reliable approach
-/// is to simply align all global variables and allocation instructions to
-/// their preferred alignment from the beginning.
-///
-static unsigned EnforceKnownAlignment(Value *V,
- unsigned Align, unsigned PrefAlign) {
-
- User *U = dyn_cast<User>(V);
- if (!U) return Align;
-
- switch (Operator::getOpcode(U)) {
- default: break;
- case Instruction::BitCast:
- return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
- case Instruction::GetElementPtr: {
- // If all indexes are zero, it is just the alignment of the base pointer.
- bool AllZeroOperands = true;
- for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
- if (!isa<Constant>(*i) ||
- !cast<Constant>(*i)->isNullValue()) {
- AllZeroOperands = false;
- break;
- }
-
- if (AllZeroOperands) {
- // Treat this like a bitcast.
- return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
- }
- return Align;
- }
- case Instruction::Alloca: {
- AllocaInst *AI = cast<AllocaInst>(V);
- // If there is a requested alignment and if this is an alloca, round up.
- if (AI->getAlignment() >= PrefAlign)
- return AI->getAlignment();
- AI->setAlignment(PrefAlign);
- return PrefAlign;
- }
- }
-
- if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
- // If there is a large requested alignment and we can, bump up the alignment
- // of the global.
- if (GV->isDeclaration()) return Align;
-
- if (GV->getAlignment() >= PrefAlign)
- return GV->getAlignment();
- // We can only increase the alignment of the global if it has no alignment
- // specified or if it is not assigned a section. If it is assigned a
- // section, the global could be densely packed with other objects in the
- // section, increasing the alignment could cause padding issues.
- if (!GV->hasSection() || GV->getAlignment() == 0)
- GV->setAlignment(PrefAlign);
- return GV->getAlignment();
- }
-
- return Align;
-}
-
-/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that
-/// we can determine, return it, otherwise return 0. If PrefAlign is specified,
-/// and it is more than the alignment of the ultimate object, see if we can
-/// increase the alignment of the ultimate object, making this check succeed.
-unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V,
- unsigned PrefAlign) {
- assert(V->getType()->isPointerTy() &&
- "GetOrEnforceKnownAlignment expects a pointer!");
- unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
- APInt Mask = APInt::getAllOnesValue(BitWidth);
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(V, Mask, KnownZero, KnownOne);
- unsigned TrailZ = KnownZero.countTrailingOnes();
-
- // Avoid trouble with rediculously large TrailZ values, such as
- // those computed from a null pointer.
- TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
-
- unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
-
- // LLVM doesn't support alignments larger than this currently.
- Align = std::min(Align, +Value::MaximumAlignment);
-
- if (PrefAlign > Align)
- Align = EnforceKnownAlignment(V, Align, PrefAlign);
-
- // We don't need to make any adjustment.
- return Align;
-}
Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
- unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(0));
- unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(1));
+ unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), TD);
+ unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), TD);
unsigned MinAlign = std::min(DstAlign, SrcAlign);
unsigned CopyAlign = MI->getAlignment();
@@ -211,7 +122,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
}
Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
- unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest());
+ unsigned Alignment = getKnownAlignment(MI->getDest(), TD);
if (MI->getAlignment() < Alignment) {
MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
Alignment, false));
@@ -234,7 +145,9 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
const Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
Value *Dest = MI->getDest();
- Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy));
+ unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
+ Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
+ Dest = Builder->CreateBitCast(Dest, NewDstPtrTy);
// Alignment 0 is identity for alignment 1 for memset, but not store.
if (Alignment == 0) Alignment = 1;
@@ -280,7 +193,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// memmove/cpy/set of zero bytes is a noop.
if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
- if (NumBytes->isNullValue()) return EraseInstFromFunction(CI);
+ if (NumBytes->isNullValue())
+ return EraseInstFromFunction(CI);
if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
if (CI->getZExtValue() == 1) {
@@ -289,6 +203,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// alignment is sufficient.
}
}
+
+ // No other transformations apply to volatile transfers.
+ if (MI->isVolatile())
+ return 0;
// If we have a memmove and the source operation is a constant global,
// then the source and dest pointers can't alias, so we can change this
@@ -332,82 +250,73 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (!TD) break;
const Type *ReturnTy = CI.getType();
- bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
+ uint64_t DontKnow = II->getArgOperand(1) == Builder->getTrue() ? 0 : -1ULL;
// Get to the real allocated thing and offset as fast as possible.
Value *Op1 = II->getArgOperand(0)->stripPointerCasts();
-
+
+ uint64_t Offset = 0;
+ uint64_t Size = -1ULL;
+
+ // Try to look through constant GEPs.
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1)) {
+ if (!GEP->hasAllConstantIndices()) break;
+
+ // Get the current byte offset into the thing. Use the original
+ // operand in case we're looking through a bitcast.
+ SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end());
+ Offset = TD->getIndexedOffset(GEP->getPointerOperandType(),
+ Ops.data(), Ops.size());
+
+ Op1 = GEP->getPointerOperand()->stripPointerCasts();
+
+ // Make sure we're not a constant offset from an external
+ // global.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1))
+ if (!GV->hasDefinitiveInitializer()) break;
+ }
+
// If we've stripped down to a single global variable that we
// can know the size of then just return that.
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) {
if (GV->hasDefinitiveInitializer()) {
Constant *C = GV->getInitializer();
- uint64_t GlobalSize = TD->getTypeAllocSize(C->getType());
- return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, GlobalSize));
+ Size = TD->getTypeAllocSize(C->getType());
} else {
// Can't determine size of the GV.
- Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
+ Constant *RetVal = ConstantInt::get(ReturnTy, DontKnow);
return ReplaceInstUsesWith(CI, RetVal);
}
} else if (AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) {
// Get alloca size.
if (AI->getAllocatedType()->isSized()) {
- uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
+ Size = TD->getTypeAllocSize(AI->getAllocatedType());
if (AI->isArrayAllocation()) {
const ConstantInt *C = dyn_cast<ConstantInt>(AI->getArraySize());
if (!C) break;
- AllocaSize *= C->getZExtValue();
+ Size *= C->getZExtValue();
}
- return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, AllocaSize));
}
} else if (CallInst *MI = extractMallocCall(Op1)) {
+ // Get allocation size.
const Type* MallocType = getMallocAllocatedType(MI);
- // Get alloca size.
- if (MallocType && MallocType->isSized()) {
- if (Value *NElems = getMallocArraySize(MI, TD, true)) {
+ if (MallocType && MallocType->isSized())
+ if (Value *NElems = getMallocArraySize(MI, TD, true))
if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
- return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy,
- (NElements->getZExtValue() * TD->getTypeAllocSize(MallocType))));
- }
- }
- } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op1)) {
- // Only handle constant GEPs here.
- if (CE->getOpcode() != Instruction::GetElementPtr) break;
- GEPOperator *GEP = cast<GEPOperator>(CE);
-
- // Make sure we're not a constant offset from an external
- // global.
- Value *Operand = GEP->getPointerOperand();
- Operand = Operand->stripPointerCasts();
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand))
- if (!GV->hasDefinitiveInitializer()) break;
-
- // Get what we're pointing to and its size.
- const PointerType *BaseType =
- cast<PointerType>(Operand->getType());
- uint64_t Size = TD->getTypeAllocSize(BaseType->getElementType());
-
- // Get the current byte offset into the thing. Use the original
- // operand in case we're looking through a bitcast.
- SmallVector<Value*, 8> Ops(CE->op_begin()+1, CE->op_end());
- const PointerType *OffsetType =
- cast<PointerType>(GEP->getPointerOperand()->getType());
- uint64_t Offset = TD->getIndexedOffset(OffsetType, &Ops[0], Ops.size());
-
- if (Size < Offset) {
- // Out of bound reference? Negative index normalized to large
- // index? Just return "I don't know".
- Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
- return ReplaceInstUsesWith(CI, RetVal);
- }
-
- Constant *RetVal = ConstantInt::get(ReturnTy, Size-Offset);
- return ReplaceInstUsesWith(CI, RetVal);
- }
+ Size = NElements->getZExtValue() * TD->getTypeAllocSize(MallocType);
+ }
// Do not return "I don't know" here. Later optimization passes could
// make it possible to evaluate objectsize to a constant.
- break;
+ if (Size == -1ULL)
+ break;
+
+ if (Size < Offset) {
+ // Out of bound reference? Negative index normalized to large
+ // index? Just return "I don't know".
+ return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, DontKnow));
+ }
+ return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, Size-Offset));
}
case Intrinsic::bswap:
// bswap(bswap(x)) -> x
@@ -604,7 +513,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_sse2_loadu_dq:
// Turn PPC lvx -> load if the pointer is known aligned.
// Turn X86 loadups -> load if the pointer is known aligned.
- if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) {
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
return new LoadInst(Ptr);
@@ -613,7 +522,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
// Turn stvx -> store if the pointer is known aligned.
- if (GetOrEnforceKnownAlignment(II->getArgOperand(1), 16) >= 16) {
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) {
const Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(0)->getType());
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
@@ -624,16 +533,23 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
// Turn X86 storeu -> store if the pointer is known aligned.
- if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) {
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
const Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(1)->getType());
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
return new StoreInst(II->getArgOperand(1), Ptr);
}
break;
-
- case Intrinsic::x86_sse_cvttss2si: {
- // These intrinsics only demands the 0th element of its input vector. If
+
+ case Intrinsic::x86_sse_cvtss2si:
+ case Intrinsic::x86_sse_cvtss2si64:
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvtsd2si:
+ case Intrinsic::x86_sse2_cvtsd2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64: {
+ // These intrinsics only demand the 0th element of their input vectors. If
// we can simplify the input based on that, do so now.
unsigned VWidth =
cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
@@ -646,7 +562,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
break;
}
-
+
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) {
@@ -697,6 +613,32 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
break;
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane:
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane: {
+ unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), TD);
+ unsigned AlignArg = II->getNumArgOperands() - 1;
+ ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
+ if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
+ II->setArgOperand(AlignArg,
+ ConstantInt::get(Type::getInt32Ty(II->getContext()),
+ MemAlign, false));
+ return II;
+ }
+ break;
+ }
+
case Intrinsic::stackrestore: {
// If the save is right next to the restore, remove the restore. This can
// happen when variable allocas are DCE'd.
@@ -783,6 +725,8 @@ protected:
NewInstruction = IC->ReplaceInstUsesWith(*CI, With);
}
bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
+ if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
+ return true;
if (ConstantInt *SizeCI =
dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
if (SizeCI->isAllOnesValue())
@@ -819,11 +763,11 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
Instruction *InstCombiner::visitCallSite(CallSite CS) {
bool Changed = false;
- // If the callee is a constexpr cast of a function, attempt to move the cast
- // to the arguments of the call/invoke.
- if (transformConstExprCastCall(CS)) return 0;
-
+ // If the callee is a pointer to a function, attempt to move any casts to the
+ // arguments of the call/invoke.
Value *Callee = CS.getCalledValue();
+ if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
+ return 0;
if (Function *CalleeF = dyn_cast<Function>(Callee))
// If the call and callee calling conventions don't match, this call must
@@ -917,12 +861,10 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
// attempt to move the cast to the arguments of the call/invoke.
//
bool InstCombiner::transformConstExprCastCall(CallSite CS) {
- if (!isa<ConstantExpr>(CS.getCalledValue())) return false;
- ConstantExpr *CE = cast<ConstantExpr>(CS.getCalledValue());
- if (CE->getOpcode() != Instruction::BitCast ||
- !isa<Function>(CE->getOperand(0)))
+ Function *Callee =
+ dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+ if (Callee == 0)
return false;
- Function *Callee = cast<Function>(CE->getOperand(0));
Instruction *Caller = CS.getInstruction();
const AttrListPtr &CallerPAL = CS.getAttributes();
@@ -984,9 +926,22 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (!CastInst::isCastable(ActTy, ParamTy))
return false; // Cannot transform this parameter value.
- if (CallerPAL.getParamAttributes(i + 1)
- & Attribute::typeIncompatible(ParamTy))
+ unsigned Attrs = CallerPAL.getParamAttributes(i + 1);
+ if (Attrs & Attribute::typeIncompatible(ParamTy))
return false; // Attribute not compatible with transformed value.
+
+ // If the parameter is passed as a byval argument, then we have to have a
+ // sized type and the sized type has to have the same size as the old type.
+ if (ParamTy != ActTy && (Attrs & Attribute::ByVal)) {
+ const PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
+ if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
+ return false;
+
+ const Type *CurElTy = cast<PointerType>(ActTy)->getElementType();
+ if (TD->getTypeAllocSize(CurElTy) !=
+ TD->getTypeAllocSize(ParamPTy->getElementType()))
+ return false;
+ }
// Converting from one pointer type to another or between a pointer and an
// integer of the same size is safe even if we do not have a body.
@@ -1109,8 +1064,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
Value *NV = NC;
if (OldRetTy != NV->getType() && !Caller->use_empty()) {
if (!NV->getType()->isVoidTy()) {
- Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false,
- OldRetTy, false);
+ Instruction::CastOps opcode =
+ CastInst::getCastOpcode(NC, false, OldRetTy, false);
NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
// If this is an invoke instruction, we should insert it after the first
@@ -1119,7 +1074,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI();
InsertNewInstBefore(NC, *I);
} else {
- // Otherwise, it's a call, just insert cast right after the call instr
+ // Otherwise, it's a call, just insert cast right after the call.
InsertNewInstBefore(NC, *Caller);
}
Worklist.AddUsersToWorkList(*Caller);
@@ -1128,7 +1083,6 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
}
}
-
if (!Caller->use_empty())
Caller->replaceAllUsesWith(NV);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 79a9b09..b432641 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -462,8 +462,8 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
Value *A = 0; ConstantInt *Cst = 0;
- if (match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst))) &&
- Src->hasOneUse()) {
+ if (Src->hasOneUse() &&
+ match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst)))) {
// We have three types to worry about here, the type of A, the source of
// the truncate (MidSize), and the destination of the truncate. We know that
// ASize < MidSize and MidSize > ResultSize, but don't know the relation
@@ -482,6 +482,16 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
Shift->takeName(Src);
return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
}
+
+ // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest
+ // type isn't non-native.
+ if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) &&
+ ShouldChangeType(Src->getType(), CI.getType()) &&
+ match(Src, m_And(m_Value(A), m_ConstantInt(Cst)))) {
+ Value *NewTrunc = Builder->CreateTrunc(A, CI.getType(), A->getName()+".tr");
+ return BinaryOperator::CreateAnd(NewTrunc,
+ ConstantExpr::getTrunc(Cst, CI.getType()));
+ }
return 0;
}
@@ -1019,8 +1029,22 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
}
}
}
-
-
+
+ // vector (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed.
+ if (const VectorType *VTy = dyn_cast<VectorType>(DestTy)) {
+ ICmpInst::Predicate Pred; Value *CmpLHS;
+ if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_Zero()))) {
+ if (Pred == ICmpInst::ICMP_SLT && CmpLHS->getType() == DestTy) {
+ const Type *EltTy = VTy->getElementType();
+
+ // splat the shift constant to a constant vector.
+ Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1);
+ Value *In = Builder->CreateAShr(CmpLHS, VSh,CmpLHS->getName()+".lobit");
+ return ReplaceInstUsesWith(CI, In);
+ }
+ }
+ }
+
// If the input is a shl/ashr pair of a same constant, then this is a sign
// extension from a smaller value. If we could trust arbitrary bitwidth
// integers, we could turn this into a truncate to the smaller bit and then
@@ -1363,8 +1387,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
ConstantInt::get(Int32Ty, SrcElts));
}
- Constant *Mask = ConstantVector::get(ShuffleMask.data(), ShuffleMask.size());
- return new ShuffleVectorInst(InVal, V2, Mask);
+ return new ShuffleVectorInst(InVal, V2, ConstantVector::get(ShuffleMask));
}
static bool isMultipleOfTypeSize(unsigned Value, const Type *Ty) {
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index d7e2b72..999de34 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -22,13 +22,17 @@
using namespace llvm;
using namespace PatternMatch;
+static ConstantInt *getOne(Constant *C) {
+ return ConstantInt::get(cast<IntegerType>(C->getType()), 1);
+}
+
/// AddOne - Add one to a ConstantInt
static Constant *AddOne(Constant *C) {
return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
}
/// SubOne - Subtract one from a ConstantInt
-static Constant *SubOne(ConstantInt *C) {
- return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
+static Constant *SubOne(Constant *C) {
+ return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
}
static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
@@ -160,8 +164,8 @@ static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero,
Max = KnownOne|UnknownBits;
if (UnknownBits.isNegative()) { // Sign bit is unknown
- Min.set(Min.getBitWidth()-1);
- Max.clear(Max.getBitWidth()-1);
+ Min.setBit(Min.getBitWidth()-1);
+ Max.clearBit(Max.getBitWidth()-1);
}
}
@@ -694,13 +698,6 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
if (Pred == ICmpInst::ICMP_NE)
return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext()));
- // If this is an instruction (as opposed to constantexpr) get NUW/NSW info.
- bool isNUW = false, isNSW = false;
- if (BinaryOperator *Add = dyn_cast<BinaryOperator>(TheAdd)) {
- isNUW = Add->hasNoUnsignedWrap();
- isNSW = Add->hasNoSignedWrap();
- }
-
// From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
// so the values can never be equal. Similiarly for all other "or equals"
// operators.
@@ -709,10 +706,6 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
// (X+2) <u X --> X >u (MAXUINT-2) --> X > 253
// (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0
if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
- // If this is an NUW add, then this is always false.
- if (isNUW)
- return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext()));
-
Value *R =
ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI);
return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
@@ -721,12 +714,8 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
// (X+1) >u X --> X <u (0-1) --> X != 255
// (X+2) >u X --> X <u (0-2) --> X <u 254
// (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0
- if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
- // If this is an NUW add, then this is always true.
- if (isNUW)
- return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext()));
+ if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI));
- }
unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits();
ConstantInt *SMax = ConstantInt::get(X->getContext(),
@@ -738,16 +727,8 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
// (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1
// (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126
// (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127
- if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) {
- // If this is an NSW add, then we have two cases: if the constant is
- // positive, then this is always false, if negative, this is always true.
- if (isNSW) {
- bool isTrue = CI->getValue().isNegative();
- return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
- }
-
+ if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI));
- }
// (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127
// (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126
@@ -756,13 +737,6 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
// (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126
// (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128
- // If this is an NSW add, then we have two cases: if the constant is
- // positive, then this is always true, if negative, this is always false.
- if (isNSW) {
- bool isTrue = !CI->getValue().isNegative();
- return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
- }
-
assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1);
return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
@@ -782,7 +756,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
// results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
// (x /u C1) <u C2. Simply casting the operands and result won't
// work. :( The if statement below tests that condition and bails
- // if it finds it.
+ // if it finds it.
bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
if (!ICI.isEquality() && DivIsSigned != ICI.isSigned())
return 0;
@@ -790,9 +764,11 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
return 0; // The ProdOV computation fails on divide by zero.
if (DivIsSigned && DivRHS->isAllOnesValue())
return 0; // The overflow computation also screws up here
- if (DivRHS->isOne())
- return 0; // Not worth bothering, and eliminates some funny cases
- // with INT_MIN.
+ if (DivRHS->isOne()) {
+ // This eliminates some funny cases with INT_MIN.
+ ICI.setOperand(0, DivI->getOperand(0)); // X/1 == X.
+ return &ICI;
+ }
// Compute Prod = CI * DivRHS. We are essentially solving an equation
// of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and
@@ -809,6 +785,10 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
// Get the ICmp opcode
ICmpInst::Predicate Pred = ICI.getPredicate();
+ /// If the division is known to be exact, then there is no remainder from the
+ /// divide, so the covered range size is unit, otherwise it is the divisor.
+ ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS;
+
// Figure out the interval that is being checked. For example, a comparison
// like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
// Compute this interval based on the constants involved and the signedness of
@@ -818,38 +798,43 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
// -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
int LoOverflow = 0, HiOverflow = 0;
Constant *LoBound = 0, *HiBound = 0;
-
+
if (!DivIsSigned) { // udiv
// e.g. X/5 op 3 --> [15, 20)
LoBound = Prod;
HiOverflow = LoOverflow = ProdOV;
- if (!HiOverflow)
- HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, false);
+ if (!HiOverflow) {
+ // If this is not an exact divide, then many values in the range collapse
+ // to the same result value.
+ HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false);
+ }
+
} else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
if (CmpRHSV == 0) { // (X / pos) op 0
// Can't overflow. e.g. X/2 op 0 --> [-1, 2)
- LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS)));
- HiBound = DivRHS;
+ LoBound = ConstantExpr::getNeg(SubOne(RangeSize));
+ HiBound = RangeSize;
} else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos
LoBound = Prod; // e.g. X/5 op 3 --> [15, 20)
HiOverflow = LoOverflow = ProdOV;
if (!HiOverflow)
- HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, true);
+ HiOverflow = AddWithOverflow(HiBound, Prod, RangeSize, true);
} else { // (X / pos) op neg
// e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14)
HiBound = AddOne(Prod);
LoOverflow = HiOverflow = ProdOV ? -1 : 0;
if (!LoOverflow) {
- ConstantInt* DivNeg =
- cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
+ ConstantInt *DivNeg =cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
- }
+ }
}
} else if (DivRHS->getValue().isNegative()) { // Divisor is < 0.
+ if (DivI->isExact())
+ RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
if (CmpRHSV == 0) { // (X / neg) op 0
// e.g. X/-5 op 0 --> [-4, 5)
- LoBound = AddOne(DivRHS);
- HiBound = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
+ LoBound = AddOne(RangeSize);
+ HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
if (HiBound == DivRHS) { // -INTMIN = INTMIN
HiOverflow = 1; // [INTMIN+1, overflow)
HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN
@@ -859,12 +844,12 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
HiBound = AddOne(Prod);
HiOverflow = LoOverflow = ProdOV ? -1 : 0;
if (!LoOverflow)
- LoOverflow = AddWithOverflow(LoBound, HiBound, DivRHS, true) ? -1 : 0;
+ LoOverflow = AddWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
} else { // (X / neg) op neg
LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20)
LoOverflow = HiOverflow = ProdOV;
if (!HiOverflow)
- HiOverflow = SubWithOverflow(HiBound, Prod, DivRHS, true);
+ HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true);
}
// Dividing by a negative swaps the condition. LT <-> GT
@@ -883,9 +868,8 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
if (LoOverflow)
return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
ICmpInst::ICMP_ULT, X, HiBound);
- return ReplaceInstUsesWith(ICI,
- InsertRangeTest(X, LoBound, HiBound, DivIsSigned,
- true));
+ return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+ DivIsSigned, true));
case ICmpInst::ICMP_NE:
if (LoOverflow && HiOverflow)
return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
@@ -908,13 +892,100 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
case ICmpInst::ICMP_SGT:
if (HiOverflow == +1) // High bound greater than input range.
return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
- else if (HiOverflow == -1) // High bound less than input range.
+ if (HiOverflow == -1) // High bound less than input range.
return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
if (Pred == ICmpInst::ICMP_UGT)
return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
- else
- return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
+ return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
+ }
+}
+
+/// FoldICmpShrCst - Handle "icmp(([al]shr X, cst1), cst2)".
+Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
+ ConstantInt *ShAmt) {
+ const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue();
+
+ // Check that the shift amount is in range. If not, don't perform
+ // undefined shifts. When the shift is visited it will be
+ // simplified.
+ uint32_t TypeBits = CmpRHSV.getBitWidth();
+ uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+ if (ShAmtVal >= TypeBits || ShAmtVal == 0)
+ return 0;
+
+ if (!ICI.isEquality()) {
+ // If we have an unsigned comparison and an ashr, we can't simplify this.
+ // Similarly for signed comparisons with lshr.
+ if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr))
+ return 0;
+
+ // Otherwise, all lshr and all exact ashr's are equivalent to a udiv/sdiv by
+ // a power of 2. Since we already have logic to simplify these, transform
+ // to div and then simplify the resultant comparison.
+ if (Shr->getOpcode() == Instruction::AShr &&
+ !Shr->isExact())
+ return 0;
+
+ // Revisit the shift (to delete it).
+ Worklist.Add(Shr);
+
+ Constant *DivCst =
+ ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal));
+
+ Value *Tmp =
+ Shr->getOpcode() == Instruction::AShr ?
+ Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) :
+ Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact());
+
+ ICI.setOperand(0, Tmp);
+
+ // If the builder folded the binop, just return it.
+ BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp);
+ if (TheDiv == 0)
+ return &ICI;
+
+ // Otherwise, fold this div/compare.
+ assert(TheDiv->getOpcode() == Instruction::SDiv ||
+ TheDiv->getOpcode() == Instruction::UDiv);
+
+ Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst));
+ assert(Res && "This div/cst should have folded!");
+ return Res;
+ }
+
+
+ // If we are comparing against bits always shifted out, the
+ // comparison cannot succeed.
+ APInt Comp = CmpRHSV << ShAmtVal;
+ ConstantInt *ShiftedCmpRHS = ConstantInt::get(ICI.getContext(), Comp);
+ if (Shr->getOpcode() == Instruction::LShr)
+ Comp = Comp.lshr(ShAmtVal);
+ else
+ Comp = Comp.ashr(ShAmtVal);
+
+ if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero.
+ bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+ Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
+ IsICMP_NE);
+ return ReplaceInstUsesWith(ICI, Cst);
+ }
+
+ // Otherwise, check to see if the bits shifted out are known to be zero.
+ // If so, we can compare against the unshifted value:
+ // (X & 4) >> 1 == 2 --> (X & 4) == 4.
+ if (Shr->hasOneUse() && Shr->isExact())
+ return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS);
+
+ if (Shr->hasOneUse()) {
+ // Otherwise strength reduce the shift into an and.
+ APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
+ Constant *Mask = ConstantInt::get(ICI.getContext(), Val);
+
+ Value *And = Builder->CreateAnd(Shr->getOperand(0),
+ Mask, Shr->getName()+".mask");
+ return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS);
}
+ return 0;
}
@@ -939,8 +1010,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
// If all the high bits are known, we can do this xform.
if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) {
// Pull in the high bits from known-ones set.
- APInt NewRHS(RHS->getValue());
- NewRHS.zext(SrcBits);
+ APInt NewRHS = RHS->getValue().zext(SrcBits);
NewRHS |= KnownOne;
return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
ConstantInt::get(ICI.getContext(), NewRHS));
@@ -1022,10 +1092,8 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
(AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) {
uint32_t BitWidth =
cast<IntegerType>(Cast->getOperand(0)->getType())->getBitWidth();
- APInt NewCST = AndCST->getValue();
- NewCST.zext(BitWidth);
- APInt NewCI = RHSV;
- NewCI.zext(BitWidth);
+ APInt NewCST = AndCST->getValue().zext(BitWidth);
+ APInt NewCI = RHSV.zext(BitWidth);
Value *NewAnd =
Builder->CreateAnd(Cast->getOperand(0),
ConstantInt::get(ICI.getContext(), NewCST),
@@ -1145,7 +1213,6 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
// Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
// -> and (icmp eq P, null), (icmp eq Q, null).
-
Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P,
Constant::getNullValue(P->getType()));
Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q,
@@ -1185,6 +1252,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
return ReplaceInstUsesWith(ICI, Cst);
}
+ // If the shift is NUW, then it is just shifting out zeros, no need for an
+ // AND.
+ if (cast<BinaryOperator>(LHSI)->hasNoUnsignedWrap())
+ return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+ ConstantExpr::getLShr(RHS, ShAmt));
+
if (LHSI->hasOneUse()) {
// Otherwise strength reduce the shift into an and.
uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
@@ -1195,8 +1268,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
Value *And =
Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask");
return new ICmpInst(ICI.getPredicate(), And,
- ConstantInt::get(ICI.getContext(),
- RHSV.lshr(ShAmtVal)));
+ ConstantExpr::getLShr(RHS, ShAmt));
}
}
@@ -1205,8 +1277,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if (LHSI->hasOneUse() &&
isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) {
// (X << 31) <s 0 --> (X&1) != 0
- Constant *Mask = ConstantInt::get(ICI.getContext(), APInt(TypeBits, 1) <<
- (TypeBits-ShAmt->getZExtValue()-1));
+ Constant *Mask = ConstantInt::get(LHSI->getOperand(0)->getType(),
+ APInt::getOneBitSet(TypeBits,
+ TypeBits-ShAmt->getZExtValue()-1));
Value *And =
Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask");
return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
@@ -1216,57 +1289,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI)
- case Instruction::AShr: {
+ case Instruction::AShr:
// Only handle equality comparisons of shift-by-constant.
- ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
- if (!ShAmt || !ICI.isEquality()) break;
-
- // Check that the shift amount is in range. If not, don't perform
- // undefined shifts. When the shift is visited it will be
- // simplified.
- uint32_t TypeBits = RHSV.getBitWidth();
- if (ShAmt->uge(TypeBits))
- break;
-
- uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
-
- // If we are comparing against bits always shifted out, the
- // comparison cannot succeed.
- APInt Comp = RHSV << ShAmtVal;
- if (LHSI->getOpcode() == Instruction::LShr)
- Comp = Comp.lshr(ShAmtVal);
- else
- Comp = Comp.ashr(ShAmtVal);
-
- if (Comp != RHSV) { // Comparing against a bit that we know is zero.
- bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
- Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
- IsICMP_NE);
- return ReplaceInstUsesWith(ICI, Cst);
- }
-
- // Otherwise, check to see if the bits shifted out are known to be zero.
- // If so, we can compare against the unshifted value:
- // (X & 4) >> 1 == 2 --> (X & 4) == 4.
- if (LHSI->hasOneUse() &&
- MaskedValueIsZero(LHSI->getOperand(0),
- APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) {
- return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
- ConstantExpr::getShl(RHS, ShAmt));
- }
-
- if (LHSI->hasOneUse()) {
- // Otherwise strength reduce the shift into an and.
- APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
- Constant *Mask = ConstantInt::get(ICI.getContext(), Val);
-
- Value *And = Builder->CreateAnd(LHSI->getOperand(0),
- Mask, LHSI->getName()+".mask");
- return new ICmpInst(ICI.getPredicate(), And,
- ConstantExpr::getShl(RHS, ShAmt));
- }
+ if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)))
+ if (Instruction *Res = FoldICmpShrCst(ICI, cast<BinaryOperator>(LHSI),
+ ShAmt))
+ return Res;
break;
- }
case Instruction::SDiv:
case Instruction::UDiv:
@@ -1543,50 +1572,174 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// The re-extended constant changed so the constant cannot be represented
// in the shorter type. Consequently, we cannot emit a simple comparison.
+ // All the cases that fold to true or false will have already been handled
+ // by SimplifyICmpInst, so only deal with the tricky case.
- // First, handle some easy cases. We know the result cannot be equal at this
- // point so handle the ICI.isEquality() cases
- if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
- return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
- if (ICI.getPredicate() == ICmpInst::ICMP_NE)
- return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+ if (isSignedCmp || !isSignedExt)
+ return 0;
// Evaluate the comparison for LT (we invert for GT below). LE and GE cases
// should have been folded away previously and not enter in here.
- Value *Result;
- if (isSignedCmp) {
- // We're performing a signed comparison.
- if (cast<ConstantInt>(CI)->getValue().isNegative())
- Result = ConstantInt::getFalse(ICI.getContext()); // X < (small) --> false
- else
- Result = ConstantInt::getTrue(ICI.getContext()); // X < (large) --> true
- } else {
- // We're performing an unsigned comparison.
- if (isSignedExt) {
- // We're performing an unsigned comp with a sign extended value.
- // This is true if the input is >= 0. [aka >s -1]
- Constant *NegOne = Constant::getAllOnesValue(SrcTy);
- Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName());
- } else {
- // Unsigned extend & unsigned compare -> always true.
- Result = ConstantInt::getTrue(ICI.getContext());
- }
- }
+
+ // We're performing an unsigned comp with a sign extended value.
+ // This is true if the input is >= 0. [aka >s -1]
+ Constant *NegOne = Constant::getAllOnesValue(SrcTy);
+ Value *Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName());
// Finally, return the value computed.
- if (ICI.getPredicate() == ICmpInst::ICMP_ULT ||
- ICI.getPredicate() == ICmpInst::ICMP_SLT)
+ if (ICI.getPredicate() == ICmpInst::ICMP_ULT)
return ReplaceInstUsesWith(ICI, Result);
- assert((ICI.getPredicate()==ICmpInst::ICMP_UGT ||
- ICI.getPredicate()==ICmpInst::ICMP_SGT) &&
- "ICmp should be folded!");
- if (Constant *CI = dyn_cast<Constant>(Result))
- return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI));
+ assert(ICI.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!");
return BinaryOperator::CreateNot(Result);
}
+/// ProcessUGT_ADDCST_ADD - The caller has matched a pattern of the form:
+/// I = icmp ugt (add (add A, B), CI2), CI1
+/// If this is of the form:
+/// sum = a + b
+/// if (sum+128 >u 255)
+/// Then replace it with llvm.sadd.with.overflow.i8.
+///
+static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
+ ConstantInt *CI2, ConstantInt *CI1,
+ InstCombiner &IC) {
+ // The transformation we're trying to do here is to transform this into an
+ // llvm.sadd.with.overflow. To do this, we have to replace the original add
+ // with a narrower add, and discard the add-with-constant that is part of the
+ // range check (if we can't eliminate it, this isn't profitable).
+
+ // In order to eliminate the add-with-constant, the compare can be its only
+ // use.
+ Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
+ if (!AddWithCst->hasOneUse()) return 0;
+
+ // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
+ if (!CI2->getValue().isPowerOf2()) return 0;
+ unsigned NewWidth = CI2->getValue().countTrailingZeros();
+ if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0;
+
+ // The width of the new add formed is 1 more than the bias.
+ ++NewWidth;
+
+ // Check to see that CI1 is an all-ones value with NewWidth bits.
+ if (CI1->getBitWidth() == NewWidth ||
+ CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
+ return 0;
+
+ // In order to replace the original add with a narrower
+ // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
+ // and truncates that discard the high bits of the add. Verify that this is
+ // the case.
+ Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0));
+ for (Value::use_iterator UI = OrigAdd->use_begin(), E = OrigAdd->use_end();
+ UI != E; ++UI) {
+ if (*UI == AddWithCst) continue;
+
+ // Only accept truncates for now. We would really like a nice recursive
+ // predicate like SimplifyDemandedBits, but which goes downwards the use-def
+ // chain to see which bits of a value are actually demanded. If the
+ // original add had another add which was then immediately truncated, we
+ // could still do the transformation.
+ TruncInst *TI = dyn_cast<TruncInst>(*UI);
+ if (TI == 0 ||
+ TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0;
+ }
+
+ // If the pattern matches, truncate the inputs to the narrower type and
+ // use the sadd_with_overflow intrinsic to efficiently compute both the
+ // result and the overflow bit.
+ Module *M = I.getParent()->getParent()->getParent();
+
+ const Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
+ Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow,
+ &NewType, 1);
+
+ InstCombiner::BuilderTy *Builder = IC.Builder;
+
+ // Put the new code above the original add, in case there are any uses of the
+ // add between the add and the compare.
+ Builder->SetInsertPoint(OrigAdd);
+
+ Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc");
+ Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc");
+ CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB, "sadd");
+ Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result");
+ Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType());
+
+ // The inner add was the result of the narrow add, zero extended to the
+ // wider type. Replace it with the result computed by the intrinsic.
+ IC.ReplaceInstUsesWith(*OrigAdd, ZExt);
+
+ // The original icmp gets replaced with the overflow value.
+ return ExtractValueInst::Create(Call, 1, "sadd.overflow");
+}
+
+static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
+ InstCombiner &IC) {
+ // Don't bother doing this transformation for pointers, don't do it for
+ // vectors.
+ if (!isa<IntegerType>(OrigAddV->getType())) return 0;
+
+ // If the add is a constant expr, then we don't bother transforming it.
+ Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV);
+ if (OrigAdd == 0) return 0;
+
+ Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1);
+
+ // Put the new code above the original add, in case there are any uses of the
+ // add between the add and the compare.
+ InstCombiner::BuilderTy *Builder = IC.Builder;
+ Builder->SetInsertPoint(OrigAdd);
+
+ Module *M = I.getParent()->getParent()->getParent();
+ const Type *Ty = LHS->getType();
+ Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, &Ty,1);
+ CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd");
+ Value *Add = Builder->CreateExtractValue(Call, 0);
+ IC.ReplaceInstUsesWith(*OrigAdd, Add);
+
+ // The original icmp gets replaced with the overflow value.
+ return ExtractValueInst::Create(Call, 1, "uadd.overflow");
+}
+
+// DemandedBitsLHSMask - When performing a comparison against a constant,
+// it is possible that not all the bits in the LHS are demanded. This helper
+// method computes the mask that IS demanded.
+static APInt DemandedBitsLHSMask(ICmpInst &I,
+ unsigned BitWidth, bool isSignCheck) {
+ if (isSignCheck)
+ return APInt::getSignBit(BitWidth);
+
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1));
+ if (!CI) return APInt::getAllOnesValue(BitWidth);
+ const APInt &RHS = CI->getValue();
+
+ switch (I.getPredicate()) {
+ // For a UGT comparison, we don't care about any bits that
+ // correspond to the trailing ones of the comparand. The value of these
+ // bits doesn't impact the outcome of the comparison, because any value
+ // greater than the RHS must differ in a bit higher than these due to carry.
+ case ICmpInst::ICMP_UGT: {
+ unsigned trailingOnes = RHS.countTrailingOnes();
+ APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes);
+ return ~lowBitsSet;
+ }
+
+ // Similarly, for a ULT comparison, we don't care about the trailing zeros.
+ // Any value less than the RHS must differ in a higher bit because of carries.
+ case ICmpInst::ICMP_ULT: {
+ unsigned trailingZeros = RHS.countTrailingZeros();
+ APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros);
+ return ~lowBitsSet;
+ }
+
+ default:
+ return APInt::getAllOnesValue(BitWidth);
+ }
+
+}
Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
bool Changed = false;
@@ -1649,17 +1802,37 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
unsigned BitWidth = 0;
- if (TD)
- BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
- else if (Ty->isIntOrIntVectorTy())
+ if (Ty->isIntOrIntVectorTy())
BitWidth = Ty->getScalarSizeInBits();
-
+ else if (TD) // Pointers require TD info to get their size.
+ BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
+
bool isSignBit = false;
// See if we are doing a comparison with a constant.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
Value *A = 0, *B = 0;
+ // Match the following pattern, which is a common idiom when writing
+ // overflow-safe integer arithmetic function. The source performs an
+ // addition in wider type, and explicitly checks for overflow using
+ // comparisons against INT_MIN and INT_MAX. Simplify this by using the
+ // sadd_with_overflow intrinsic.
+ //
+ // TODO: This could probably be generalized to handle other overflow-safe
+ // operations if we worked out the formulas to compute the appropriate
+ // magic constants.
+ //
+ // sum = a + b
+ // if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8
+ {
+ ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI
+ if (I.getPredicate() == ICmpInst::ICMP_UGT &&
+ match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
+ if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this))
+ return Res;
+ }
+
// (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
if (I.isEquality() && CI->isZero() &&
match(Op0, m_Sub(m_Value(A), m_Value(B)))) {
@@ -1704,8 +1877,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0);
if (SimplifyDemandedBits(I.getOperandUse(0),
- isSignBit ? APInt::getSignBit(BitWidth)
- : APInt::getAllOnesValue(BitWidth),
+ DemandedBitsLHSMask(I, BitWidth, isSignBit),
Op0KnownZero, Op0KnownOne, 0))
return &I;
if (SimplifyDemandedBits(I.getOperandUse(1),
@@ -1744,14 +1916,80 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// simplify this comparison. For example, (x&4) < 8 is always true.
switch (I.getPredicate()) {
default: llvm_unreachable("Unknown icmp opcode!");
- case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_EQ: {
if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+
+ // If all bits are known zero except for one, then we know at most one
+ // bit is set. If the comparison is against zero, then this is a check
+ // to see if *that* bit is set.
+ APInt Op0KnownZeroInverted = ~Op0KnownZero;
+ if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) {
+ // If the LHS is an AND with the same constant, look through it.
+ Value *LHS = 0;
+ ConstantInt *LHSC = 0;
+ if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
+ LHSC->getValue() != Op0KnownZeroInverted)
+ LHS = Op0;
+
+ // If the LHS is 1 << x, and we know the result is a power of 2 like 8,
+ // then turn "((1 << x)&8) == 0" into "x != 3".
+ Value *X = 0;
+ if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
+ unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros();
+ return new ICmpInst(ICmpInst::ICMP_NE, X,
+ ConstantInt::get(X->getType(), CmpVal));
+ }
+
+ // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
+ // then turn "((8 >>u x)&1) == 0" into "x != 3".
+ const APInt *CI;
+ if (Op0KnownZeroInverted == 1 &&
+ match(LHS, m_LShr(m_Power2(CI), m_Value(X))))
+ return new ICmpInst(ICmpInst::ICMP_NE, X,
+ ConstantInt::get(X->getType(),
+ CI->countTrailingZeros()));
+ }
+
break;
- case ICmpInst::ICMP_NE:
+ }
+ case ICmpInst::ICMP_NE: {
if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+
+ // If all bits are known zero except for one, then we know at most one
+ // bit is set. If the comparison is against zero, then this is a check
+ // to see if *that* bit is set.
+ APInt Op0KnownZeroInverted = ~Op0KnownZero;
+ if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) {
+ // If the LHS is an AND with the same constant, look through it.
+ Value *LHS = 0;
+ ConstantInt *LHSC = 0;
+ if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
+ LHSC->getValue() != Op0KnownZeroInverted)
+ LHS = Op0;
+
+ // If the LHS is 1 << x, and we know the result is a power of 2 like 8,
+ // then turn "((1 << x)&8) != 0" into "x == 3".
+ Value *X = 0;
+ if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
+ unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros();
+ return new ICmpInst(ICmpInst::ICMP_EQ, X,
+ ConstantInt::get(X->getType(), CmpVal));
+ }
+
+ // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
+ // then turn "((8 >>u x)&1) != 0" into "x == 3".
+ const APInt *CI;
+ if (Op0KnownZeroInverted == 1 &&
+ match(LHS, m_LShr(m_Power2(CI), m_Value(X))))
+ return new ICmpInst(ICmpInst::ICMP_EQ, X,
+ ConstantInt::get(X->getType(),
+ CI->countTrailingZeros()));
+ }
+
break;
+ }
case ICmpInst::ICMP_ULT:
if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B)
return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
@@ -1894,7 +2132,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// block. If in the same block, we're encouraging jump threading. If
// not, we are just pessimizing the code by making an i1 phi.
if (LHSI->getParent() == I.getParent())
- if (Instruction *NV = FoldOpIntoPhi(I, true))
+ if (Instruction *NV = FoldOpIntoPhi(I))
return NV;
break;
case Instruction::Select: {
@@ -1995,79 +2233,163 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (Instruction *R = visitICmpInstWithCastAndCast(I))
return R;
}
-
- // See if it's the same type of instruction on the left and right.
- if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
- if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
- if (Op0I->getOpcode() == Op1I->getOpcode() && Op0I->hasOneUse() &&
- Op1I->hasOneUse() && Op0I->getOperand(1) == Op1I->getOperand(1)) {
- switch (Op0I->getOpcode()) {
- default: break;
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::Xor:
- if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
- return new ICmpInst(I.getPredicate(), Op0I->getOperand(0),
- Op1I->getOperand(0));
- // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
- if (CI->getValue().isSignBit()) {
- ICmpInst::Predicate Pred = I.isSigned()
- ? I.getUnsignedPredicate()
- : I.getSignedPredicate();
- return new ICmpInst(Pred, Op0I->getOperand(0),
- Op1I->getOperand(0));
- }
-
- if (CI->getValue().isMaxSignedValue()) {
- ICmpInst::Predicate Pred = I.isSigned()
- ? I.getUnsignedPredicate()
- : I.getSignedPredicate();
- Pred = I.getSwappedPredicate(Pred);
- return new ICmpInst(Pred, Op0I->getOperand(0),
- Op1I->getOperand(0));
- }
+
+ // Special logic for binary operators.
+ BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0);
+ BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1);
+ if (BO0 || BO1) {
+ CmpInst::Predicate Pred = I.getPredicate();
+ bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
+ if (BO0 && isa<OverflowingBinaryOperator>(BO0))
+ NoOp0WrapProblem = ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
+ (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
+ if (BO1 && isa<OverflowingBinaryOperator>(BO1))
+ NoOp1WrapProblem = ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
+ (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
+
+ // Analyze the case when either Op0 or Op1 is an add instruction.
+ // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
+ Value *A = 0, *B = 0, *C = 0, *D = 0;
+ if (BO0 && BO0->getOpcode() == Instruction::Add)
+ A = BO0->getOperand(0), B = BO0->getOperand(1);
+ if (BO1 && BO1->getOpcode() == Instruction::Add)
+ C = BO1->getOperand(0), D = BO1->getOperand(1);
+
+ // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
+ if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
+ return new ICmpInst(Pred, A == Op1 ? B : A,
+ Constant::getNullValue(Op1->getType()));
+
+ // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
+ if ((C == Op0 || D == Op0) && NoOp1WrapProblem)
+ return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()),
+ C == Op0 ? D : C);
+
+ // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow.
+ if (A && C && (A == C || A == D || B == C || B == D) &&
+ NoOp0WrapProblem && NoOp1WrapProblem &&
+ // Try not to increase register pressure.
+ BO0->hasOneUse() && BO1->hasOneUse()) {
+ // Determine Y and Z in the form icmp (X+Y), (X+Z).
+ Value *Y = (A == C || A == D) ? B : A;
+ Value *Z = (C == A || C == B) ? D : C;
+ return new ICmpInst(Pred, Y, Z);
+ }
+
+ // Analyze the case when either Op0 or Op1 is a sub instruction.
+ // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
+ A = 0; B = 0; C = 0; D = 0;
+ if (BO0 && BO0->getOpcode() == Instruction::Sub)
+ A = BO0->getOperand(0), B = BO0->getOperand(1);
+ if (BO1 && BO1->getOpcode() == Instruction::Sub)
+ C = BO1->getOperand(0), D = BO1->getOperand(1);
+
+ // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow.
+ if (A == Op1 && NoOp0WrapProblem)
+ return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B);
+
+ // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow.
+ if (C == Op0 && NoOp1WrapProblem)
+ return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType()));
+
+ // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow.
+ if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem &&
+ // Try not to increase register pressure.
+ BO0->hasOneUse() && BO1->hasOneUse())
+ return new ICmpInst(Pred, A, C);
+
+ // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow.
+ if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem &&
+ // Try not to increase register pressure.
+ BO0->hasOneUse() && BO1->hasOneUse())
+ return new ICmpInst(Pred, D, B);
+
+ if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() &&
+ BO0->hasOneUse() && BO1->hasOneUse() &&
+ BO0->getOperand(1) == BO1->getOperand(1)) {
+ switch (BO0->getOpcode()) {
+ default: break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Xor:
+ if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
+ return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+ BO1->getOperand(0));
+ // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
+ if (CI->getValue().isSignBit()) {
+ ICmpInst::Predicate Pred = I.isSigned()
+ ? I.getUnsignedPredicate()
+ : I.getSignedPredicate();
+ return new ICmpInst(Pred, BO0->getOperand(0),
+ BO1->getOperand(0));
+ }
+
+ if (CI->getValue().isMaxSignedValue()) {
+ ICmpInst::Predicate Pred = I.isSigned()
+ ? I.getUnsignedPredicate()
+ : I.getSignedPredicate();
+ Pred = I.getSwappedPredicate(Pred);
+ return new ICmpInst(Pred, BO0->getOperand(0),
+ BO1->getOperand(0));
}
+ }
+ break;
+ case Instruction::Mul:
+ if (!I.isEquality())
break;
- case Instruction::Mul:
- if (!I.isEquality())
- break;
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
- // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
- // Mask = -1 >> count-trailing-zeros(Cst).
- if (!CI->isZero() && !CI->isOne()) {
- const APInt &AP = CI->getValue();
- ConstantInt *Mask = ConstantInt::get(I.getContext(),
- APInt::getLowBitsSet(AP.getBitWidth(),
- AP.getBitWidth() -
- AP.countTrailingZeros()));
- Value *And1 = Builder->CreateAnd(Op0I->getOperand(0), Mask);
- Value *And2 = Builder->CreateAnd(Op1I->getOperand(0), Mask);
- return new ICmpInst(I.getPredicate(), And1, And2);
- }
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
+ // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
+ // Mask = -1 >> count-trailing-zeros(Cst).
+ if (!CI->isZero() && !CI->isOne()) {
+ const APInt &AP = CI->getValue();
+ ConstantInt *Mask = ConstantInt::get(I.getContext(),
+ APInt::getLowBitsSet(AP.getBitWidth(),
+ AP.getBitWidth() -
+ AP.countTrailingZeros()));
+ Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask);
+ Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask);
+ return new ICmpInst(I.getPredicate(), And1, And2);
}
- break;
}
+ break;
}
}
}
- // ~x < ~y --> y < x
{ Value *A, *B;
- if (match(Op0, m_Not(m_Value(A))) &&
- match(Op1, m_Not(m_Value(B))))
- return new ICmpInst(I.getPredicate(), B, A);
+ // ~x < ~y --> y < x
+ // ~x < cst --> ~cst < x
+ if (match(Op0, m_Not(m_Value(A)))) {
+ if (match(Op1, m_Not(m_Value(B))))
+ return new ICmpInst(I.getPredicate(), B, A);
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1))
+ return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
+ }
+
+ // (a+b) <u a --> llvm.uadd.with.overflow.
+ // (a+b) <u b --> llvm.uadd.with.overflow.
+ if (I.getPredicate() == ICmpInst::ICMP_ULT &&
+ match(Op0, m_Add(m_Value(A), m_Value(B))) &&
+ (Op1 == A || Op1 == B))
+ if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
+ return R;
+
+ // a >u (a+b) --> llvm.uadd.with.overflow.
+ // b >u (a+b) --> llvm.uadd.with.overflow.
+ if (I.getPredicate() == ICmpInst::ICMP_UGT &&
+ match(Op1, m_Add(m_Value(A), m_Value(B))) &&
+ (Op0 == A || Op0 == B))
+ if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
+ return R;
}
if (I.isEquality()) {
Value *A, *B, *C, *D;
-
- // -x == -y --> x == y
- if (match(Op0, m_Neg(m_Value(A))) &&
- match(Op1, m_Neg(m_Value(B))))
- return new ICmpInst(I.getPredicate(), A, B);
-
+
if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0
Value *OtherVal = A == Op1 ? B : A;
@@ -2102,16 +2424,6 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Constant::getNullValue(A->getType()));
}
- // (A-B) == A -> B == 0
- if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B))))
- return new ICmpInst(I.getPredicate(), B,
- Constant::getNullValue(B->getType()));
-
- // A == (A-B) -> B == 0
- if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B))))
- return new ICmpInst(I.getPredicate(), B,
- Constant::getNullValue(B->getType()));
-
// (X&Z) == (Y&Z) -> (X^Y) & Z == 0
if (Op0->hasOneUse() && Op1->hasOneUse() &&
match(Op0, m_And(m_Value(A), m_Value(B))) &&
@@ -2397,7 +2709,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
// block. If in the same block, we're encouraging jump threading. If
// not, we are just pessimizing the code by making an i1 phi.
if (LHSI->getParent() == I.getParent())
- if (Instruction *NV = FoldOpIntoPhi(I, true))
+ if (Instruction *NV = FoldOpIntoPhi(I))
return NV;
break;
case Instruction::SIToFP:
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index b68fbc2..78ff734 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -145,7 +145,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
// Attempt to improve the alignment.
if (TD) {
unsigned KnownAlign =
- GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()));
+ getOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()),TD);
unsigned LoadAlign = LI.getAlignment();
unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
TD->getABITypeAlignment(LI.getType());
@@ -165,7 +165,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (LI.isVolatile()) return 0;
// Do really simple store-to-load forwarding and load CSE, to catch cases
- // where there are several consequtive memory accesses to the same location,
+ // where there are several consecutive memory accesses to the same location,
// separated by a few arithmetic operations.
BasicBlock::iterator BBI = &LI;
if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
@@ -330,7 +330,9 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
SIOp0->getName()+".c");
- return new StoreInst(NewCast, CastOp);
+ SI.setOperand(0, NewCast);
+ SI.setOperand(1, CastOp);
+ return &SI;
}
/// equivalentAddressValues - Test if A and B will obviously have the same
@@ -414,7 +416,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
// Attempt to improve the alignment.
if (TD) {
unsigned KnownAlign =
- GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
+ getOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()),
+ TD);
unsigned StoreAlign = SI.getAlignment();
unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
TD->getABITypeAlignment(Val->getType());
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index b3974e8..d1a1fd6 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -14,26 +14,22 @@
#include "InstCombine.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Support/PatternMatch.h"
using namespace llvm;
using namespace PatternMatch;
-/// SubOne - Subtract one from a ConstantInt.
-static Constant *SubOne(ConstantInt *C) {
- return ConstantInt::get(C->getContext(), C->getValue()-1);
-}
-
/// MultiplyOverflows - True if the multiply can not be expressed in an int
/// this size.
static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
uint32_t W = C1->getBitWidth();
APInt LHSExt = C1->getValue(), RHSExt = C2->getValue();
if (sign) {
- LHSExt.sext(W * 2);
- RHSExt.sext(W * 2);
+ LHSExt = LHSExt.sext(W * 2);
+ RHSExt = RHSExt.sext(W * 2);
} else {
- LHSExt.zext(W * 2);
- RHSExt.zext(W * 2);
+ LHSExt = LHSExt.zext(W * 2);
+ RHSExt = RHSExt.zext(W * 2);
}
APInt MulExt = LHSExt * RHSExt;
@@ -47,62 +43,48 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
}
Instruction *InstCombiner::visitMul(BinaryOperator &I) {
- bool Changed = SimplifyCommutative(I);
+ bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (isa<UndefValue>(Op1)) // undef * X -> 0
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ if (Value *V = SimplifyMulInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
- // Simplify mul instructions with a constant RHS.
- if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1C)) {
-
- // ((X << C1)*C2) == (X * (C2 << C1))
- if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
- if (SI->getOpcode() == Instruction::Shl)
- if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
- return BinaryOperator::CreateMul(SI->getOperand(0),
- ConstantExpr::getShl(CI, ShOp));
-
- if (CI->isZero())
- return ReplaceInstUsesWith(I, Op1C); // X * 0 == 0
- if (CI->equalsInt(1)) // X * 1 == X
- return ReplaceInstUsesWith(I, Op0);
- if (CI->isAllOnesValue()) // X * -1 == 0 - X
- return BinaryOperator::CreateNeg(Op0, I.getName());
-
- const APInt& Val = cast<ConstantInt>(CI)->getValue();
- if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C
- return BinaryOperator::CreateShl(Op0,
- ConstantInt::get(Op0->getType(), Val.logBase2()));
- }
- } else if (Op1C->getType()->isVectorTy()) {
- if (Op1C->isNullValue())
- return ReplaceInstUsesWith(I, Op1C);
-
- if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
- if (Op1V->isAllOnesValue()) // X * -1 == 0 - X
- return BinaryOperator::CreateNeg(Op0, I.getName());
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
- // As above, vector X*splat(1.0) -> X in all defined cases.
- if (Constant *Splat = Op1V->getSplatValue()) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Splat))
- if (CI->equalsInt(1))
- return ReplaceInstUsesWith(I, Op0);
- }
- }
+ if (match(Op1, m_AllOnes())) // X * -1 == 0 - X
+ return BinaryOperator::CreateNeg(Op0, I.getName());
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+
+ // ((X << C1)*C2) == (X * (C2 << C1))
+ if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
+ if (SI->getOpcode() == Instruction::Shl)
+ if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
+ return BinaryOperator::CreateMul(SI->getOperand(0),
+ ConstantExpr::getShl(CI, ShOp));
+
+ const APInt &Val = CI->getValue();
+ if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C
+ Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2());
+ BinaryOperator *Shl = BinaryOperator::CreateShl(Op0, NewCst);
+ if (I.hasNoSignedWrap()) Shl->setHasNoSignedWrap();
+ if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap();
+ return Shl;
}
- if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
- if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() &&
- isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1C)) {
- // Canonicalize (X+C1)*C2 -> X*C2+C1*C2.
- Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C, "tmp");
- Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1));
- return BinaryOperator::CreateAdd(Add, C1C2);
-
+ // Canonicalize (X+C1)*CI -> X*CI+C1*CI.
+ { Value *X; ConstantInt *C1;
+ if (Op0->hasOneUse() &&
+ match(Op0, m_Add(m_Value(X), m_ConstantInt(C1)))) {
+ Value *Add = Builder->CreateMul(X, CI, "tmp");
+ return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, CI));
}
-
+ }
+ }
+
+ // Simplify mul instructions with a constant RHS.
+ if (isa<Constant>(Op1)) {
// Try to fold constant mul into select arguments.
if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
if (Instruction *R = FoldOpIntoSelect(I, SI))
@@ -135,8 +117,8 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
BO->getOpcode() == Instruction::SDiv)) {
Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1);
- // If the division is exact, X % Y is zero.
- if (SDivOperator *SDiv = dyn_cast<SDivOperator>(BO))
+ // If the division is exact, X % Y is zero, so we end up with X or -X.
+ if (PossiblyExactOperator *SDiv = dyn_cast<PossiblyExactOperator>(BO))
if (SDiv->isExact()) {
if (Op1BO == Op1C)
return ReplaceInstUsesWith(I, Op0BO);
@@ -194,7 +176,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
}
Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
- bool Changed = SimplifyCommutative(I);
+ bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
// Simplify mul instructions with a constant RHS...
@@ -304,28 +286,6 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
}
-/// This function implements the transforms on div instructions that work
-/// regardless of the kind of div instruction it is (udiv, sdiv, or fdiv). It is
-/// used by the visitors to those instructions.
-/// @brief Transforms common to all three div instructions
-Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
- Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
- // undef / X -> 0 for integer.
- // undef / X -> undef for FP (the undef could be a snan).
- if (isa<UndefValue>(Op0)) {
- if (Op0->getType()->isFPOrFPVectorTy())
- return ReplaceInstUsesWith(I, Op0);
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
- }
-
- // X / undef -> undef
- if (isa<UndefValue>(Op1))
- return ReplaceInstUsesWith(I, Op1);
-
- return 0;
-}
-
/// This function implements the transforms common to both integer division
/// instructions (udiv and sdiv). It is called by the visitors to those integer
/// division instructions.
@@ -333,31 +293,12 @@ Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- // (sdiv X, X) --> 1 (udiv X, X) --> 1
- if (Op0 == Op1) {
- if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) {
- Constant *CI = ConstantInt::get(Ty->getElementType(), 1);
- std::vector<Constant*> Elts(Ty->getNumElements(), CI);
- return ReplaceInstUsesWith(I, ConstantVector::get(Elts));
- }
-
- Constant *CI = ConstantInt::get(I.getType(), 1);
- return ReplaceInstUsesWith(I, CI);
- }
-
- if (Instruction *Common = commonDivTransforms(I))
- return Common;
-
// Handle cases involving: [su]div X, (select Cond, Y, Z)
// This does not apply for fdiv.
if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
return &I;
if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
- // div X, 1 == X
- if (RHS->equalsInt(1))
- return ReplaceInstUsesWith(I, Op0);
-
// (X / C1) / C2 -> X / (C1*C2)
if (Instruction *LHS = dyn_cast<Instruction>(Op0))
if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode())
@@ -365,9 +306,8 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
if (MultiplyOverflows(RHS, LHSRHS,
I.getOpcode()==Instruction::SDiv))
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
- else
- return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
- ConstantExpr::getMul(RHS, LHSRHS));
+ return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
+ ConstantExpr::getMul(RHS, LHSRHS));
}
if (!RHS->isZero()) { // avoid X udiv 0
@@ -380,20 +320,13 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
}
}
- // 0 / X == 0, we don't need to preserve faults!
- if (ConstantInt *LHS = dyn_cast<ConstantInt>(Op0))
- if (LHS->equalsInt(0))
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-
- // It can't be division by zero, hence it must be division by one.
- if (I.getType()->isIntegerTy(1))
- return ReplaceInstUsesWith(I, Op0);
-
- if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
- if (ConstantInt *X = cast_or_null<ConstantInt>(Op1V->getSplatValue()))
- // div X, 1 == X
- if (X->isOne())
- return ReplaceInstUsesWith(I, Op0);
+ // (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y
+ Value *X = 0, *Z = 0;
+ if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1
+ bool isSigned = I.getOpcode() == Instruction::SDiv;
+ if ((isSigned && match(Z, m_SRem(m_Specific(X), m_Specific(Op1)))) ||
+ (!isSigned && match(Z, m_URem(m_Specific(X), m_Specific(Op1)))))
+ return BinaryOperator::Create(I.getOpcode(), X, Op1);
}
return 0;
@@ -402,6 +335,9 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyUDivInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
// Handle the integer div common cases
if (Instruction *Common = commonIDivTransforms(I))
return Common;
@@ -410,60 +346,59 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
// X udiv 2^C -> X >> C
// Check to see if this is an unsigned division with an exact power of 2,
// if so, convert to a right shift.
- if (C->getValue().isPowerOf2()) // 0 not included in isPowerOf2
- return BinaryOperator::CreateLShr(Op0,
+ if (C->getValue().isPowerOf2()) { // 0 not included in isPowerOf2
+ BinaryOperator *LShr =
+ BinaryOperator::CreateLShr(Op0,
ConstantInt::get(Op0->getType(), C->getValue().logBase2()));
+ if (I.isExact()) LShr->setIsExact();
+ return LShr;
+ }
// X udiv C, where C >= signbit
if (C->getValue().isNegative()) {
- Value *IC = Builder->CreateICmpULT( Op0, C);
+ Value *IC = Builder->CreateICmpULT(Op0, C);
return SelectInst::Create(IC, Constant::getNullValue(I.getType()),
ConstantInt::get(I.getType(), 1));
}
}
// X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2)
- if (BinaryOperator *RHSI = dyn_cast<BinaryOperator>(I.getOperand(1))) {
- if (RHSI->getOpcode() == Instruction::Shl &&
- isa<ConstantInt>(RHSI->getOperand(0))) {
- const APInt& C1 = cast<ConstantInt>(RHSI->getOperand(0))->getValue();
- if (C1.isPowerOf2()) {
- Value *N = RHSI->getOperand(1);
- const Type *NTy = N->getType();
- if (uint32_t C2 = C1.logBase2())
- N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2), "tmp");
- return BinaryOperator::CreateLShr(Op0, N);
- }
+ { const APInt *CI; Value *N;
+ if (match(Op1, m_Shl(m_Power2(CI), m_Value(N)))) {
+ if (*CI != 1)
+ N = Builder->CreateAdd(N, ConstantInt::get(I.getType(), CI->logBase2()),
+ "tmp");
+ if (I.isExact())
+ return BinaryOperator::CreateExactLShr(Op0, N);
+ return BinaryOperator::CreateLShr(Op0, N);
}
}
// udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
// where C1&C2 are powers of two.
- if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
- if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1)))
- if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) {
- const APInt &TVA = STO->getValue(), &FVA = SFO->getValue();
- if (TVA.isPowerOf2() && FVA.isPowerOf2()) {
- // Compute the shift amounts
- uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2();
- // Construct the "on true" case of the select
- Constant *TC = ConstantInt::get(Op0->getType(), TSA);
- Value *TSI = Builder->CreateLShr(Op0, TC, SI->getName()+".t");
+ { Value *Cond; const APInt *C1, *C2;
+ if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) {
+ // Construct the "on true" case of the select
+ Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t",
+ I.isExact());
- // Construct the "on false" case of the select
- Constant *FC = ConstantInt::get(Op0->getType(), FSA);
- Value *FSI = Builder->CreateLShr(Op0, FC, SI->getName()+".f");
-
- // construct the select instruction and return it.
- return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName());
- }
- }
+ // Construct the "on false" case of the select
+ Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f",
+ I.isExact());
+
+ // construct the select instruction and return it.
+ return SelectInst::Create(Cond, TSI, FSI);
+ }
+ }
return 0;
}
Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifySDivInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
// Handle the integer div common cases
if (Instruction *Common = commonIDivTransforms(I))
return Common;
@@ -473,20 +408,17 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
if (RHS->isAllOnesValue())
return BinaryOperator::CreateNeg(Op0);
- // sdiv X, C --> ashr X, log2(C)
- if (cast<SDivOperator>(&I)->isExact() &&
- RHS->getValue().isNonNegative() &&
+ // sdiv X, C --> ashr exact X, log2(C)
+ if (I.isExact() && RHS->getValue().isNonNegative() &&
RHS->getValue().isPowerOf2()) {
Value *ShAmt = llvm::ConstantInt::get(RHS->getType(),
RHS->getValue().exactLogBase2());
- return BinaryOperator::CreateAShr(Op0, ShAmt, I.getName());
+ return BinaryOperator::CreateExactAShr(Op0, ShAmt, I.getName());
}
// -X/C --> X/-C provided the negation doesn't overflow.
if (SubOperator *Sub = dyn_cast<SubOperator>(Op0))
- if (isa<Constant>(Sub->getOperand(0)) &&
- cast<Constant>(Sub->getOperand(0))->isNullValue() &&
- Sub->hasNoSignedWrap())
+ if (match(Sub->getOperand(0), m_Zero()) && Sub->hasNoSignedWrap())
return BinaryOperator::CreateSDiv(Sub->getOperand(1),
ConstantExpr::getNeg(RHS));
}
@@ -500,9 +432,8 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
// X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
}
- ConstantInt *ShiftedInt;
- if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) &&
- ShiftedInt->getValue().isPowerOf2()) {
+
+ if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
// X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
// Safe because the only negative value (1 << Y) can take on is
// INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
@@ -516,7 +447,12 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
}
Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
- return commonDivTransforms(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyFDivInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ return 0;
}
/// This function implements the transforms on rem instructions that work
@@ -551,6 +487,10 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
if (Instruction *common = commonRemTransforms(I))
return common;
+ // X % X == 0
+ if (Op0 == Op1)
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
// 0 % X == 0 for integer, we don't need to preserve faults!
if (Constant *LHS = dyn_cast<Constant>(Op0))
if (LHS->isNullValue())
@@ -588,42 +528,29 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
if (Instruction *common = commonIRemTransforms(I))
return common;
- if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
- // X urem C^2 -> X and C
- // Check to see if this is an unsigned remainder with an exact power of 2,
- // if so, convert to a bitwise and.
- if (ConstantInt *C = dyn_cast<ConstantInt>(RHS))
- if (C->getValue().isPowerOf2())
- return BinaryOperator::CreateAnd(Op0, SubOne(C));
+ // X urem C^2 -> X and C-1
+ { const APInt *C;
+ if (match(Op1, m_Power2(C)))
+ return BinaryOperator::CreateAnd(Op0,
+ ConstantInt::get(I.getType(), *C-1));
}
- if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) {
- // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
- if (RHSI->getOpcode() == Instruction::Shl &&
- isa<ConstantInt>(RHSI->getOperand(0))) {
- if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) {
- Constant *N1 = Constant::getAllOnesValue(I.getType());
- Value *Add = Builder->CreateAdd(RHSI, N1, "tmp");
- return BinaryOperator::CreateAnd(Op0, Add);
- }
- }
+ // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
+ if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
+ Constant *N1 = Constant::getAllOnesValue(I.getType());
+ Value *Add = Builder->CreateAdd(Op1, N1, "tmp");
+ return BinaryOperator::CreateAnd(Op0, Add);
}
- // urem X, (select Cond, 2^C1, 2^C2) --> select Cond, (and X, C1), (and X, C2)
- // where C1&C2 are powers of two.
- if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) {
- if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1)))
- if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) {
- // STO == 0 and SFO == 0 handled above.
- if ((STO->getValue().isPowerOf2()) &&
- (SFO->getValue().isPowerOf2())) {
- Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO),
- SI->getName()+".t");
- Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO),
- SI->getName()+".f");
- return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd);
- }
- }
+ // urem X, (select Cond, 2^C1, 2^C2) -->
+ // select Cond, (and X, C1-1), (and X, C2-1)
+ // when C1&C2 are powers of two.
+ { Value *Cond; const APInt *C1, *C2;
+ if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) {
+ Value *TrueAnd = Builder->CreateAnd(Op0, *C1-1, Op1->getName()+".t");
+ Value *FalseAnd = Builder->CreateAnd(Op0, *C2-1, Op1->getName()+".f");
+ return SelectInst::Create(Cond, TrueAnd, FalseAnd);
+ }
}
return 0;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index f7fc62f..297a18c 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "InstCombine.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Target/TargetData.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/STLExtras.h"
@@ -30,22 +31,37 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
const Type *LHSType = LHSVal->getType();
const Type *RHSType = RHSVal->getType();
+ bool isNUW = false, isNSW = false, isExact = false;
+ if (OverflowingBinaryOperator *BO =
+ dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
+ isNUW = BO->hasNoUnsignedWrap();
+ isNSW = BO->hasNoSignedWrap();
+ } else if (PossiblyExactOperator *PEO =
+ dyn_cast<PossiblyExactOperator>(FirstInst))
+ isExact = PEO->isExact();
+
// Scan to see if all operands are the same opcode, and all have one use.
for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
if (!I || I->getOpcode() != Opc || !I->hasOneUse() ||
// Verify type of the LHS matches so we don't fold cmp's of different
- // types or GEP's with different index types.
+ // types.
I->getOperand(0)->getType() != LHSType ||
I->getOperand(1)->getType() != RHSType)
return 0;
// If they are CmpInst instructions, check their predicates
- if (Opc == Instruction::ICmp || Opc == Instruction::FCmp)
- if (cast<CmpInst>(I)->getPredicate() !=
- cast<CmpInst>(FirstInst)->getPredicate())
+ if (CmpInst *CI = dyn_cast<CmpInst>(I))
+ if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate())
return 0;
+ if (isNUW)
+ isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
+ if (isNSW)
+ isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ if (isExact)
+ isExact = cast<PossiblyExactOperator>(I)->isExact();
+
// Keep track of which operand needs a phi node.
if (I->getOperand(0) != LHSVal) LHSVal = 0;
if (I->getOperand(1) != RHSVal) RHSVal = 0;
@@ -96,11 +112,17 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
}
}
- if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
- return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);
- CmpInst *CIOp = cast<CmpInst>(FirstInst);
- return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
- LHSVal, RHSVal);
+ if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst))
+ return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
+ LHSVal, RHSVal);
+
+ BinaryOperator *BinOp = cast<BinaryOperator>(FirstInst);
+ BinaryOperator *NewBinOp =
+ BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);
+ if (isNUW) NewBinOp->setHasNoUnsignedWrap();
+ if (isNSW) NewBinOp->setHasNoSignedWrap();
+ if (isExact) NewBinOp->setIsExact();
+ return NewBinOp;
}
Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
@@ -117,6 +139,8 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
// especially bad when the PHIs are in the header of a loop.
bool NeededPhi = false;
+ bool AllInBounds = true;
+
// Scan to see if all operands are the same opcode, and all have one use.
for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
@@ -124,6 +148,8 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
GEP->getNumOperands() != FirstInst->getNumOperands())
return 0;
+ AllInBounds &= GEP->isInBounds();
+
// Keep track of whether or not all GEPs are of alloca pointers.
if (AllBasePointersAreAllocas &&
(!isa<AllocaInst>(GEP->getOperand(0)) ||
@@ -201,11 +227,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
}
Value *Base = FixedOperands[0];
- return cast<GEPOperator>(FirstInst)->isInBounds() ?
- GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1,
- FixedOperands.end()) :
+ GetElementPtrInst *NewGEP =
GetElementPtrInst::Create(Base, FixedOperands.begin()+1,
FixedOperands.end());
+ if (AllInBounds) NewGEP->setIsInBounds();
+ return NewGEP;
}
@@ -368,6 +394,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
// code size and simplifying code.
Constant *ConstantOp = 0;
const Type *CastSrcTy = 0;
+ bool isNUW = false, isNSW = false, isExact = false;
if (isa<CastInst>(FirstInst)) {
CastSrcTy = FirstInst->getOperand(0)->getType();
@@ -384,6 +411,14 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
if (ConstantOp == 0)
return FoldPHIArgBinOpIntoPHI(PN);
+
+ if (OverflowingBinaryOperator *BO =
+ dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
+ isNUW = BO->hasNoUnsignedWrap();
+ isNSW = BO->hasNoSignedWrap();
+ } else if (PossiblyExactOperator *PEO =
+ dyn_cast<PossiblyExactOperator>(FirstInst))
+ isExact = PEO->isExact();
} else {
return 0; // Cannot fold this operation.
}
@@ -399,6 +434,13 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
} else if (I->getOperand(1) != ConstantOp) {
return 0;
}
+
+ if (isNUW)
+ isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
+ if (isNSW)
+ isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ if (isExact)
+ isExact = cast<PossiblyExactOperator>(I)->isExact();
}
// Okay, they are all the same operation. Create a new PHI node of the
@@ -433,8 +475,13 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst))
return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType());
- if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
- return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
+ if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) {
+ BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
+ if (isNUW) BinOp->setHasNoUnsignedWrap();
+ if (isNSW) BinOp->setHasNoSignedWrap();
+ if (isExact) BinOp->setIsExact();
+ return BinOp;
+ }
CmpInst *CIOp = cast<CmpInst>(FirstInst);
return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
@@ -731,8 +778,8 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
Instruction *InstCombiner::visitPHINode(PHINode &PN) {
// If LCSSA is around, don't mess with Phi nodes
if (MustPreserveLCSSA) return 0;
-
- if (Value *V = PN.hasConstantValue())
+
+ if (Value *V = SimplifyInstruction(&PN, TD))
return ReplaceInstUsesWith(PN, V);
// If all PHI operands are the same operation, pull them through the PHI,
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index c44fe9d..97abc76 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -24,14 +24,14 @@ static SelectPatternFlavor
MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
SelectInst *SI = dyn_cast<SelectInst>(V);
if (SI == 0) return SPF_UNKNOWN;
-
+
ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition());
if (ICI == 0) return SPF_UNKNOWN;
-
+
LHS = ICI->getOperand(0);
RHS = ICI->getOperand(1);
-
- // (icmp X, Y) ? X : Y
+
+ // (icmp X, Y) ? X : Y
if (SI->getTrueValue() == ICI->getOperand(0) &&
SI->getFalseValue() == ICI->getOperand(1)) {
switch (ICI->getPredicate()) {
@@ -46,8 +46,8 @@ MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
case ICmpInst::ICMP_SLE: return SPF_SMIN;
}
}
-
- // (icmp X, Y) ? Y : X
+
+ // (icmp X, Y) ? Y : X
if (SI->getTrueValue() == ICI->getOperand(1) &&
SI->getFalseValue() == ICI->getOperand(0)) {
switch (ICI->getPredicate()) {
@@ -62,9 +62,9 @@ MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
case ICmpInst::ICMP_SLE: return SPF_SMAX;
}
}
-
+
// TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5)
-
+
return SPF_UNKNOWN;
}
@@ -136,7 +136,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0),
FI->getOperand(0), SI.getName()+".v");
InsertNewInstBefore(NewSI, SI);
- return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,
+ return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,
TI->getType());
}
@@ -195,7 +195,10 @@ static bool isSelect01(Constant *C1, Constant *C2) {
ConstantInt *C2I = dyn_cast<ConstantInt>(C2);
if (!C2I)
return false;
- return (C1I->isZero() || C1I->isOne()) && (C2I->isZero() || C2I->isOne());
+ if (!C1I->isZero() && !C2I->isZero()) // One side must be zero.
+ return false;
+ return C1I->isOne() || C1I->isAllOnesValue() ||
+ C2I->isOne() || C2I->isAllOnesValue();
}
/// FoldSelectIntoOp - Try fold the select into one of the operands to
@@ -219,7 +222,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
Constant *C = GetSelectFoldableConstant(TVI);
Value *OOp = TVI->getOperand(2-OpToFold);
// Avoid creating select between 2 constants unless it's selecting
- // between 0 and 1.
+ // between 0, 1 and -1.
if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C);
InsertNewInstBefore(NewSel, SI);
@@ -248,7 +251,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
Constant *C = GetSelectFoldableConstant(FVI);
Value *OOp = FVI->getOperand(2-OpToFold);
// Avoid creating select between 2 constants unless it's selecting
- // between 0 and 1.
+ // between 0, 1 and -1.
if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp);
InsertNewInstBefore(NewSel, SI);
@@ -278,52 +281,95 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
Value *FalseVal = SI.getFalseValue();
// Check cases where the comparison is with a constant that
- // can be adjusted to fit the min/max idiom. We may edit ICI in
- // place here, so make sure the select is the only user.
+ // can be adjusted to fit the min/max idiom. We may move or edit ICI
+ // here, so make sure the select is the only user.
if (ICI->hasOneUse())
if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) {
+ // X < MIN ? T : F --> F
+ if ((Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT)
+ && CI->isMinValue(Pred == ICmpInst::ICMP_SLT))
+ return ReplaceInstUsesWith(SI, FalseVal);
+ // X > MAX ? T : F --> F
+ else if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT)
+ && CI->isMaxValue(Pred == ICmpInst::ICMP_SGT))
+ return ReplaceInstUsesWith(SI, FalseVal);
switch (Pred) {
default: break;
case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_SLT: {
- // X < MIN ? T : F --> F
- if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT))
- return ReplaceInstUsesWith(SI, FalseVal);
- // X < C ? X : C-1 --> X > C-1 ? C-1 : X
- Constant *AdjustedRHS =
- ConstantInt::get(CI->getContext(), CI->getValue()-1);
- if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
- (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
- Pred = ICmpInst::getSwappedPredicate(Pred);
- CmpRHS = AdjustedRHS;
- std::swap(FalseVal, TrueVal);
- ICI->setPredicate(Pred);
- ICI->setOperand(1, CmpRHS);
- SI.setOperand(1, TrueVal);
- SI.setOperand(2, FalseVal);
- Changed = true;
- }
- break;
- }
+ case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_SGT: {
- // X > MAX ? T : F --> F
- if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT))
- return ReplaceInstUsesWith(SI, FalseVal);
+ // These transformations only work for selects over integers.
+ const IntegerType *SelectTy = dyn_cast<IntegerType>(SI.getType());
+ if (!SelectTy)
+ break;
+
+ Constant *AdjustedRHS;
+ if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_SGT)
+ AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() + 1);
+ else // (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT)
+ AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() - 1);
+
// X > C ? X : C+1 --> X < C+1 ? C+1 : X
- Constant *AdjustedRHS =
- ConstantInt::get(CI->getContext(), CI->getValue()+1);
+ // X < C ? X : C-1 --> X > C-1 ? C-1 : X
if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
- (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
- Pred = ICmpInst::getSwappedPredicate(Pred);
- CmpRHS = AdjustedRHS;
- std::swap(FalseVal, TrueVal);
- ICI->setPredicate(Pred);
- ICI->setOperand(1, CmpRHS);
- SI.setOperand(1, TrueVal);
- SI.setOperand(2, FalseVal);
- Changed = true;
- }
+ (CmpLHS == FalseVal && AdjustedRHS == TrueVal))
+ ; // Nothing to do here. Values match without any sign/zero extension.
+
+ // Types do not match. Instead of calculating this with mixed types
+ // promote all to the larger type. This enables scalar evolution to
+ // analyze this expression.
+ else if (CmpRHS->getType()->getScalarSizeInBits()
+ < SelectTy->getBitWidth()) {
+ Constant *sextRHS = ConstantExpr::getSExt(AdjustedRHS, SelectTy);
+
+ // X = sext x; x >s c ? X : C+1 --> X = sext x; X <s C+1 ? C+1 : X
+ // X = sext x; x <s c ? X : C-1 --> X = sext x; X >s C-1 ? C-1 : X
+ // X = sext x; x >u c ? X : C+1 --> X = sext x; X <u C+1 ? C+1 : X
+ // X = sext x; x <u c ? X : C-1 --> X = sext x; X >u C-1 ? C-1 : X
+ if (match(TrueVal, m_SExt(m_Specific(CmpLHS))) &&
+ sextRHS == FalseVal) {
+ CmpLHS = TrueVal;
+ AdjustedRHS = sextRHS;
+ } else if (match(FalseVal, m_SExt(m_Specific(CmpLHS))) &&
+ sextRHS == TrueVal) {
+ CmpLHS = FalseVal;
+ AdjustedRHS = sextRHS;
+ } else if (ICI->isUnsigned()) {
+ Constant *zextRHS = ConstantExpr::getZExt(AdjustedRHS, SelectTy);
+ // X = zext x; x >u c ? X : C+1 --> X = zext x; X <u C+1 ? C+1 : X
+ // X = zext x; x <u c ? X : C-1 --> X = zext x; X >u C-1 ? C-1 : X
+ // zext + signed compare cannot be changed:
+ // 0xff <s 0x00, but 0x00ff >s 0x0000
+ if (match(TrueVal, m_ZExt(m_Specific(CmpLHS))) &&
+ zextRHS == FalseVal) {
+ CmpLHS = TrueVal;
+ AdjustedRHS = zextRHS;
+ } else if (match(FalseVal, m_ZExt(m_Specific(CmpLHS))) &&
+ zextRHS == TrueVal) {
+ CmpLHS = FalseVal;
+ AdjustedRHS = zextRHS;
+ } else
+ break;
+ } else
+ break;
+ } else
+ break;
+
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ CmpRHS = AdjustedRHS;
+ std::swap(FalseVal, TrueVal);
+ ICI->setPredicate(Pred);
+ ICI->setOperand(0, CmpLHS);
+ ICI->setOperand(1, CmpRHS);
+ SI.setOperand(1, TrueVal);
+ SI.setOperand(2, FalseVal);
+
+ // Move ICI instruction right before the select instruction. Otherwise
+ // the sext/zext value may be defined after the ICI instruction uses it.
+ ICI->moveBefore(&SI);
+
+ Changed = true;
break;
}
}
@@ -399,28 +445,28 @@ static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
// can always be mapped.
const Instruction *I = dyn_cast<Instruction>(V);
if (I == 0) return true;
-
+
// If V is a PHI node defined in the same block as the condition PHI, we can
// map the arguments.
const PHINode *CondPHI = cast<PHINode>(SI.getCondition());
-
+
if (const PHINode *VP = dyn_cast<PHINode>(I))
if (VP->getParent() == CondPHI->getParent())
return true;
-
+
// Otherwise, if the PHI and select are defined in the same block and if V is
// defined in a different block, then we can transform it.
if (SI.getParent() == CondPHI->getParent() &&
I->getParent() != CondPHI->getParent())
return true;
-
+
// Otherwise we have a 'hard' case and we can't tell without doing more
// detailed dominator based analysis, punt.
return false;
}
/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form:
-/// SPF2(SPF1(A, B), C)
+/// SPF2(SPF1(A, B), C)
Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
SelectPatternFlavor SPF1,
Value *A, Value *B,
@@ -431,7 +477,7 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
// MIN(MIN(a, b), a) -> MIN(a, b)
if (SPF1 == SPF2)
return ReplaceInstUsesWith(Outer, Inner);
-
+
// MAX(MIN(a, b), a) -> a
// MIN(MAX(a, b), a) -> a
if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) ||
@@ -440,13 +486,82 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
(SPF1 == SPF_UMAX && SPF2 == SPF_UMIN))
return ReplaceInstUsesWith(Outer, C);
}
-
+
// TODO: MIN(MIN(A, 23), 97)
return 0;
}
+/// foldSelectICmpAnd - If one of the constants is zero (we know they can't
+/// both be) and we have an icmp instruction with zero, and we have an 'and'
+/// with the non-constant value and a power of two we can turn the select
+/// into a shift on the result of the 'and'.
+static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
+ ConstantInt *FalseVal,
+ InstCombiner::BuilderTy *Builder) {
+ const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
+ if (!IC || !IC->isEquality())
+ return 0;
+
+ if (ConstantInt *C = dyn_cast<ConstantInt>(IC->getOperand(1)))
+ if (!C->isZero())
+ return 0;
+ ConstantInt *AndRHS;
+ Value *LHS = IC->getOperand(0);
+ if (LHS->getType() != SI.getType() ||
+ !match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS))))
+ return 0;
+
+ // If both select arms are non-zero see if we have a select of the form
+ // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic
+ // for 'x ? 2^n : 0' and fix the thing up at the end.
+ ConstantInt *Offset = 0;
+ if (!TrueVal->isZero() && !FalseVal->isZero()) {
+ if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2())
+ Offset = FalseVal;
+ else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2())
+ Offset = TrueVal;
+ else
+ return 0;
+
+ // Adjust TrueVal and FalseVal to the offset.
+ TrueVal = ConstantInt::get(Builder->getContext(),
+ TrueVal->getValue() - Offset->getValue());
+ FalseVal = ConstantInt::get(Builder->getContext(),
+ FalseVal->getValue() - Offset->getValue());
+ }
+
+ // Make sure the mask in the 'and' and one of the select arms is a power of 2.
+ if (!AndRHS->getValue().isPowerOf2() ||
+ (!TrueVal->getValue().isPowerOf2() &&
+ !FalseVal->getValue().isPowerOf2()))
+ return 0;
+
+ // Determine which shift is needed to transform result of the 'and' into the
+ // desired result.
+ ConstantInt *ValC = !TrueVal->isZero() ? TrueVal : FalseVal;
+ unsigned ValZeros = ValC->getValue().logBase2();
+ unsigned AndZeros = AndRHS->getValue().logBase2();
+
+ Value *V = LHS;
+ if (ValZeros > AndZeros)
+ V = Builder->CreateShl(V, ValZeros - AndZeros);
+ else if (ValZeros < AndZeros)
+ V = Builder->CreateLShr(V, AndZeros - ValZeros);
+
+ // Okay, now we know that everything is set up, we just don't know whether we
+ // have a icmp_ne or icmp_eq and whether the true or false val is the zero.
+ bool ShouldNotVal = !TrueVal->isZero();
+ ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
+ if (ShouldNotVal)
+ V = Builder->CreateXor(V, ValC);
+
+ // Apply an offset if needed.
+ if (Offset)
+ V = Builder->CreateAdd(V, Offset);
+ return V;
+}
Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *CondVal = SI.getCondition();
@@ -478,7 +593,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
"not."+CondVal->getName()), SI);
return BinaryOperator::CreateOr(NotCond, TrueVal);
}
-
+
// select a, b, a -> a&b
// select a, a, b -> a|b
if (CondVal == TrueVal)
@@ -497,7 +612,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// select C, -1, 0 -> sext C to int
if (FalseValC->isZero() && TrueValC->isAllOnesValue())
return new SExtInst(CondVal, SI.getType());
-
+
// select C, 0, 1 -> zext !C to int
if (TrueValC->isZero() && FalseValC->getValue() == 1) {
Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
@@ -509,32 +624,9 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
return new SExtInst(NotCond, SI.getType());
}
-
- if (ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition())) {
- // If one of the constants is zero (we know they can't both be) and we
- // have an icmp instruction with zero, and we have an 'and' with the
- // non-constant value, eliminate this whole mess. This corresponds to
- // cases like this: ((X & 27) ? 27 : 0)
- if (TrueValC->isZero() || FalseValC->isZero())
- if (IC->isEquality() && isa<ConstantInt>(IC->getOperand(1)) &&
- cast<Constant>(IC->getOperand(1))->isNullValue())
- if (Instruction *ICA = dyn_cast<Instruction>(IC->getOperand(0)))
- if (ICA->getOpcode() == Instruction::And &&
- isa<ConstantInt>(ICA->getOperand(1)) &&
- (ICA->getOperand(1) == TrueValC ||
- ICA->getOperand(1) == FalseValC) &&
- cast<ConstantInt>(ICA->getOperand(1))->getValue().isPowerOf2()) {
- // Okay, now we know that everything is set up, we just don't
- // know whether we have a icmp_ne or icmp_eq and whether the
- // true or false val is the zero.
- bool ShouldNotVal = !TrueValC->isZero();
- ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
- Value *V = ICA;
- if (ShouldNotVal)
- V = Builder->CreateXor(V, ICA->getOperand(1));
- return ReplaceInstUsesWith(SI, V);
- }
- }
+
+ if (Value *V = foldSelectICmpAnd(SI, TrueValC, FalseValC, Builder))
+ return ReplaceInstUsesWith(SI, V);
}
// See if we are selecting two values based on a comparison of the two values.
@@ -542,7 +634,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) {
// Transform (X == Y) ? X : Y -> Y
if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
- // This is not safe in general for floating point:
+ // This is not safe in general for floating point:
// consider X== -0, Y== +0.
// It becomes safe if either operand is a nonzero constant.
ConstantFP *CFPt, *CFPf;
@@ -554,7 +646,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
// Transform (X une Y) ? X : Y -> X
if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
- // This is not safe in general for floating point:
+ // This is not safe in general for floating point:
// consider X== -0, Y== +0.
// It becomes safe if either operand is a nonzero constant.
ConstantFP *CFPt, *CFPf;
@@ -569,7 +661,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
} else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
// Transform (X == Y) ? Y : X -> X
if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
- // This is not safe in general for floating point:
+ // This is not safe in general for floating point:
// consider X== -0, Y== +0.
// It becomes safe if either operand is a nonzero constant.
ConstantFP *CFPt, *CFPf;
@@ -581,7 +673,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
// Transform (X une Y) ? Y : X -> Y
if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
- // This is not safe in general for floating point:
+ // This is not safe in general for floating point:
// consider X== -0, Y== +0.
// It becomes safe if either operand is a nonzero constant.
ConstantFP *CFPt, *CFPf;
@@ -639,6 +731,10 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *NegVal; // Compute -Z
if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) {
NegVal = ConstantExpr::getNeg(C);
+ } else if (SI.getType()->isFloatingPointTy()) {
+ NegVal = InsertNewInstBefore(
+ BinaryOperator::CreateFNeg(SubOp->getOperand(1),
+ "tmp"), SI);
} else {
NegVal = InsertNewInstBefore(
BinaryOperator::CreateNeg(SubOp->getOperand(1),
@@ -654,7 +750,10 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
NewFalseOp, SI.getName() + ".p");
NewSel = InsertNewInstBefore(NewSel, SI);
- return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
+ if (SI.getType()->isFloatingPointTy())
+ return BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel);
+ else
+ return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
}
}
}
@@ -663,7 +762,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (SI.getType()->isIntegerTy()) {
if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
return FoldI;
-
+
// MAX(MAX(a, b), a) -> MAX(a, b)
// MIN(MIN(a, b), a) -> MIN(a, b)
// MAX(MIN(a, b), a) -> a
@@ -686,13 +785,26 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
// See if we can fold the select into a phi node if the condition is a select.
- if (isa<PHINode>(SI.getCondition()))
+ if (isa<PHINode>(SI.getCondition()))
// The true/false values have to be live in the PHI predecessor's blocks.
if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) &&
CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI))
if (Instruction *NV = FoldOpIntoPhi(SI))
return NV;
+ if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) {
+ if (TrueSI->getCondition() == CondVal) {
+ SI.setOperand(1, TrueSI->getTrueValue());
+ return &SI;
+ }
+ }
+ if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) {
+ if (FalseSI->getCondition() == CondVal) {
+ SI.setOperand(2, FalseSI->getFalseValue());
+ return &SI;
+ }
+ }
+
if (BinaryOperator::isNot(CondVal)) {
SI.setOperand(0, BinaryOperator::getNotArgument(CondVal));
SI.setOperand(1, FalseVal);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 27716b8..a7f8005 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -13,6 +13,7 @@
#include "InstCombine.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Support/PatternMatch.h"
using namespace llvm;
using namespace PatternMatch;
@@ -21,25 +22,6 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
assert(I.getOperand(1)->getType() == I.getOperand(0)->getType());
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- // shl X, 0 == X and shr X, 0 == X
- // shl 0, X == 0 and shr 0, X == 0
- if (Op1 == Constant::getNullValue(Op1->getType()) ||
- Op0 == Constant::getNullValue(Op0->getType()))
- return ReplaceInstUsesWith(I, Op0);
-
- if (isa<UndefValue>(Op0)) {
- if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef
- return ReplaceInstUsesWith(I, Op0);
- else // undef << X -> 0, undef >>u X -> 0
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
- }
- if (isa<UndefValue>(Op1)) {
- if (I.getOpcode() == Instruction::AShr) // X >>s undef -> X
- return ReplaceInstUsesWith(I, Op0);
- else // X << undef, X >>u undef -> 0
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
- }
-
// See if we can fold away this shift.
if (SimplifyDemandedInstructionBits(I))
return &I;
@@ -53,6 +35,20 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1))
if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
return Res;
+
+ // X shift (A srem B) -> X shift (A and B-1) iff B is a power of 2.
+ // Because shifts by negative values (which could occur if A were negative)
+ // are undefined.
+ Value *A; const APInt *B;
+ if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Power2(B)))) {
+ // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't
+ // demand the sign bit (and many others) here??
+ Value *Rem = Builder->CreateAnd(A, ConstantInt::get(I.getType(), *B-1),
+ Op1->getName());
+ I.setOperand(1, Rem);
+ return &I;
+ }
+
return 0;
}
@@ -81,7 +77,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
// if the needed bits are already zero in the input. This allows us to reuse
// the value which means that we don't care if the shift has multiple uses.
// TODO: Handle opposite shift by exact value.
- ConstantInt *CI;
+ ConstantInt *CI = 0;
if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) ||
(!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) {
if (CI->getZExtValue() == NumBits) {
@@ -131,9 +127,9 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
// We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't
// profitable unless we know the and'd out bits are already zero.
if (CI->getZExtValue() > NumBits) {
- unsigned HighBits = CI->getZExtValue() - NumBits;
+ unsigned LowBits = TypeWidth - CI->getZExtValue();
if (MaskedValueIsZero(I->getOperand(0),
- APInt::getHighBitsSet(TypeWidth, HighBits)))
+ APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
return true;
}
@@ -157,7 +153,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
if (CI->getZExtValue() > NumBits) {
unsigned LowBits = CI->getZExtValue() - NumBits;
if (MaskedValueIsZero(I->getOperand(0),
- APInt::getLowBitsSet(TypeWidth, LowBits)))
+ APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
return true;
}
@@ -622,16 +618,49 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
}
Instruction *InstCombiner::visitShl(BinaryOperator &I) {
- return commonShiftTransforms(I);
+ if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1),
+ I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
+ TD))
+ return ReplaceInstUsesWith(I, V);
+
+ if (Instruction *V = commonShiftTransforms(I))
+ return V;
+
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) {
+ unsigned ShAmt = Op1C->getZExtValue();
+
+ // If the shifted-out value is known-zero, then this is a NUW shift.
+ if (!I.hasNoUnsignedWrap() &&
+ MaskedValueIsZero(I.getOperand(0),
+ APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt))) {
+ I.setHasNoUnsignedWrap();
+ return &I;
+ }
+
+ // If the shifted out value is all signbits, this is a NSW shift.
+ if (!I.hasNoSignedWrap() &&
+ ComputeNumSignBits(I.getOperand(0)) > ShAmt) {
+ I.setHasNoSignedWrap();
+ return &I;
+ }
+ }
+
+ return 0;
}
Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
+ if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1),
+ I.isExact(), TD))
+ return ReplaceInstUsesWith(I, V);
+
if (Instruction *R = commonShiftTransforms(I))
return R;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1))
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ unsigned ShAmt = Op1C->getZExtValue();
+
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op0)) {
unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
// ctlz.i32(x)>>5 --> zext(x == 0)
@@ -640,7 +669,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
if ((II->getIntrinsicID() == Intrinsic::ctlz ||
II->getIntrinsicID() == Intrinsic::cttz ||
II->getIntrinsicID() == Intrinsic::ctpop) &&
- isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == Op1C->getZExtValue()){
+ isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == ShAmt) {
bool isCtPop = II->getIntrinsicID() == Intrinsic::ctpop;
Constant *RHS = ConstantInt::getSigned(Op0->getType(), isCtPop ? -1:0);
Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS);
@@ -648,29 +677,37 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
}
}
+ // If the shifted-out value is known-zero, then this is an exact shift.
+ if (!I.isExact() &&
+ MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){
+ I.setIsExact();
+ return &I;
+ }
+ }
+
return 0;
}
Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
+ if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1),
+ I.isExact(), TD))
+ return ReplaceInstUsesWith(I, V);
+
if (Instruction *R = commonShiftTransforms(I))
return R;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
- if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) {
- // ashr int -1, X = -1 (for any arithmetic shift rights of ~0)
- if (CSI->isAllOnesValue())
- return ReplaceInstUsesWith(I, CSI);
- }
-
+
if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ unsigned ShAmt = Op1C->getZExtValue();
+
// If the input is a SHL by the same constant (ashr (shl X, C), C), then we
// have a sign-extend idiom.
Value *X;
if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1)))) {
- // If the input value is known to already be sign extended enough, delete
- // the extension.
- if (ComputeNumSignBits(X) > Op1C->getZExtValue())
+ // If the left shift is just shifting out partial signbits, delete the
+ // extension.
+ if (cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap())
return ReplaceInstUsesWith(I, X);
// If the input is an extension from the shifted amount value, e.g.
@@ -685,6 +722,13 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
return new SExtInst(ZI->getOperand(0), ZI->getType());
}
}
+
+ // If the shifted-out value is known-zero, then this is an exact shift.
+ if (!I.isExact() &&
+ MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){
+ I.setIsExact();
+ return &I;
+ }
}
// See if we can turn a signed shr into an unsigned shr.
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index adf7a76..bda8cea 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -34,7 +34,7 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
if (!OpC) return false;
// If there are no bits set that aren't demanded, nothing to do.
- Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
+ Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
if ((~Demanded & OpC->getValue()) == 0)
return false;
@@ -121,13 +121,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
if (isa<ConstantPointerNull>(V)) {
// We know all of the bits for a constant!
- KnownOne.clear();
+ KnownOne.clearAllBits();
KnownZero = DemandedMask;
return 0;
}
- KnownZero.clear();
- KnownOne.clear();
+ KnownZero.clearAllBits();
+ KnownOne.clearAllBits();
if (DemandedMask == 0) { // Not demanding any bits from V.
if (isa<UndefValue>(V))
return 0;
@@ -388,15 +388,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
case Instruction::Trunc: {
unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
- DemandedMask.zext(truncBf);
- KnownZero.zext(truncBf);
- KnownOne.zext(truncBf);
+ DemandedMask = DemandedMask.zext(truncBf);
+ KnownZero = KnownZero.zext(truncBf);
+ KnownOne = KnownOne.zext(truncBf);
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
KnownZero, KnownOne, Depth+1))
return I;
- DemandedMask.trunc(BitWidth);
- KnownZero.trunc(BitWidth);
- KnownOne.trunc(BitWidth);
+ DemandedMask = DemandedMask.trunc(BitWidth);
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
break;
}
@@ -426,15 +426,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// Compute the bits in the result that are not present in the input.
unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
- DemandedMask.trunc(SrcBitWidth);
- KnownZero.trunc(SrcBitWidth);
- KnownOne.trunc(SrcBitWidth);
+ DemandedMask = DemandedMask.trunc(SrcBitWidth);
+ KnownZero = KnownZero.trunc(SrcBitWidth);
+ KnownOne = KnownOne.trunc(SrcBitWidth);
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
KnownZero, KnownOne, Depth+1))
return I;
- DemandedMask.zext(BitWidth);
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ DemandedMask = DemandedMask.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
// The top bits are known to be zero.
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
@@ -451,17 +451,17 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If any of the sign extended bits are demanded, we know that the sign
// bit is demanded.
if ((NewBits & DemandedMask) != 0)
- InputDemandedBits.set(SrcBitWidth-1);
+ InputDemandedBits.setBit(SrcBitWidth-1);
- InputDemandedBits.trunc(SrcBitWidth);
- KnownZero.trunc(SrcBitWidth);
- KnownOne.trunc(SrcBitWidth);
+ InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
+ KnownZero = KnownZero.trunc(SrcBitWidth);
+ KnownOne = KnownOne.trunc(SrcBitWidth);
if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
KnownZero, KnownOne, Depth+1))
return I;
- InputDemandedBits.zext(BitWidth);
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ InputDemandedBits = InputDemandedBits.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
// If the sign bit of the input is known set or clear, then we know the
@@ -576,8 +576,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
case Instruction::Shl:
if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
+
+ // If the shift is NUW/NSW, then it does demand the high bits.
+ ShlOperator *IOp = cast<ShlOperator>(I);
+ if (IOp->hasNoSignedWrap())
+ DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
+ else if (IOp->hasNoUnsignedWrap())
+ DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
KnownZero, KnownOne, Depth+1))
return I;
@@ -592,10 +600,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
case Instruction::LShr:
// For a logical shift right
if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
// Unsigned shift right.
APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
+
+ // If the shift is exact, then it does demand the low bits (and knows that
+ // they are zero).
+ if (cast<LShrOperator>(I)->isExact())
+ DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
KnownZero, KnownOne, Depth+1))
return I;
@@ -627,14 +641,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return I->getOperand(0);
if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- uint32_t ShiftAmt = SA->getLimitedValue(BitWidth);
+ uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
// Signed shift right.
APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
// If any of the "high bits" are demanded, we should set the sign bit as
// demanded.
if (DemandedMask.countLeadingZeros() <= ShiftAmt)
- DemandedMaskIn.set(BitWidth-1);
+ DemandedMaskIn.setBit(BitWidth-1);
+
+ // If the shift is exact, then it does demand the low bits (and knows that
+ // they are zero).
+ if (cast<AShrOperator>(I)->isExact())
+ DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
KnownZero, KnownOne, Depth+1))
return I;
@@ -793,10 +813,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
for (unsigned i = 0; i != VWidth; ++i)
if (!DemandedElts[i]) { // If not demanded, set to undef.
Elts.push_back(Undef);
- UndefElts.set(i);
+ UndefElts.setBit(i);
} else if (isa<UndefValue>(CV->getOperand(i))) { // Already undef.
Elts.push_back(Undef);
- UndefElts.set(i);
+ UndefElts.setBit(i);
} else { // Otherwise, defined.
Elts.push_back(CV->getOperand(i));
}
@@ -879,13 +899,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// Otherwise, the element inserted overwrites whatever was there, so the
// input demanded set is simpler than the output set.
APInt DemandedElts2 = DemandedElts;
- DemandedElts2.clear(IdxNo);
+ DemandedElts2.clearBit(IdxNo);
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
UndefElts, Depth+1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
// The inserted element is defined.
- UndefElts.clear(IdxNo);
+ UndefElts.clearBit(IdxNo);
break;
}
case Instruction::ShuffleVector: {
@@ -900,9 +920,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
assert(MaskVal < LHSVWidth * 2 &&
"shufflevector mask index out of range!");
if (MaskVal < LHSVWidth)
- LeftDemanded.set(MaskVal);
+ LeftDemanded.setBit(MaskVal);
else
- RightDemanded.set(MaskVal - LHSVWidth);
+ RightDemanded.setBit(MaskVal - LHSVWidth);
}
}
}
@@ -921,16 +941,16 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
for (unsigned i = 0; i < VWidth; i++) {
unsigned MaskVal = Shuffle->getMaskValue(i);
if (MaskVal == -1u) {
- UndefElts.set(i);
+ UndefElts.setBit(i);
} else if (MaskVal < LHSVWidth) {
if (UndefElts4[MaskVal]) {
NewUndefElts = true;
- UndefElts.set(i);
+ UndefElts.setBit(i);
}
} else {
if (UndefElts3[MaskVal - LHSVWidth]) {
NewUndefElts = true;
- UndefElts.set(i);
+ UndefElts.setBit(i);
}
}
}
@@ -973,7 +993,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
Ratio = VWidth/InVWidth;
for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
if (DemandedElts[OutIdx])
- InputDemandedElts.set(OutIdx/Ratio);
+ InputDemandedElts.setBit(OutIdx/Ratio);
}
} else {
// Untested so far.
@@ -985,7 +1005,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
Ratio = InVWidth/VWidth;
for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
if (DemandedElts[InIdx/Ratio])
- InputDemandedElts.set(InIdx);
+ InputDemandedElts.setBit(InIdx);
}
// div/rem demand all inputs, because they don't want divide by zero.
@@ -1004,7 +1024,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// undef.
for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
if (UndefElts2[OutIdx/Ratio])
- UndefElts.set(OutIdx);
+ UndefElts.setBit(OutIdx);
} else if (VWidth < InVWidth) {
llvm_unreachable("Unimp");
// If there are more elements in the source than there are in the result,
@@ -1013,7 +1033,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
UndefElts = ~0ULL >> (64-VWidth); // Start out all undef.
for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
if (!UndefElts2[InIdx]) // Not undef?
- UndefElts.clear(InIdx/Ratio); // Clear undef bit.
+ UndefElts.clearBit(InIdx/Ratio); // Clear undef bit.
}
break;
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index a58124d..5caa12d 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -18,7 +18,7 @@ using namespace llvm;
/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
/// is to leave as a vector operation.
static bool CheapToScalarize(Value *V, bool isConstant) {
- if (isa<ConstantAggregateZero>(V))
+ if (isa<ConstantAggregateZero>(V))
return true;
if (ConstantVector *C = dyn_cast<ConstantVector>(V)) {
if (isConstant) return true;
@@ -31,7 +31,7 @@ static bool CheapToScalarize(Value *V, bool isConstant) {
}
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
-
+
// Insert element gets simplified to the inserted element or is deleted if
// this is constant idx extract element and its a constant idx insertelt.
if (I->getOpcode() == Instruction::InsertElement && isConstant &&
@@ -49,26 +49,24 @@ static bool CheapToScalarize(Value *V, bool isConstant) {
(CheapToScalarize(CI->getOperand(0), isConstant) ||
CheapToScalarize(CI->getOperand(1), isConstant)))
return true;
-
+
return false;
}
-/// Read and decode a shufflevector mask.
-///
-/// It turns undef elements into values that are larger than the number of
-/// elements in the input.
-static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) {
+/// getShuffleMask - Read and decode a shufflevector mask.
+/// Turn undef elements into negative values.
+static std::vector<int> getShuffleMask(const ShuffleVectorInst *SVI) {
unsigned NElts = SVI->getType()->getNumElements();
if (isa<ConstantAggregateZero>(SVI->getOperand(2)))
- return std::vector<unsigned>(NElts, 0);
+ return std::vector<int>(NElts, 0);
if (isa<UndefValue>(SVI->getOperand(2)))
- return std::vector<unsigned>(NElts, 2*NElts);
-
- std::vector<unsigned> Result;
+ return std::vector<int>(NElts, -1);
+
+ std::vector<int> Result;
const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2));
for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i)
if (isa<UndefValue>(*i))
- Result.push_back(NElts*2); // undef -> 8
+ Result.push_back(-1); // undef
else
Result.push_back(cast<ConstantInt>(*i)->getZExtValue());
return Result;
@@ -83,42 +81,41 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
unsigned Width = PTy->getNumElements();
if (EltNo >= Width) // Out of range access.
return UndefValue::get(PTy->getElementType());
-
+
if (isa<UndefValue>(V))
return UndefValue::get(PTy->getElementType());
if (isa<ConstantAggregateZero>(V))
return Constant::getNullValue(PTy->getElementType());
if (ConstantVector *CP = dyn_cast<ConstantVector>(V))
return CP->getOperand(EltNo);
-
+
if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
// If this is an insert to a variable element, we don't know what it is.
- if (!isa<ConstantInt>(III->getOperand(2)))
+ if (!isa<ConstantInt>(III->getOperand(2)))
return 0;
unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
-
+
// If this is an insert to the element we are looking for, return the
// inserted value.
- if (EltNo == IIElt)
+ if (EltNo == IIElt)
return III->getOperand(1);
-
+
// Otherwise, the insertelement doesn't modify the value, recurse on its
// vector input.
return FindScalarElement(III->getOperand(0), EltNo);
}
-
+
if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
unsigned LHSWidth =
- cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
- unsigned InEl = getShuffleMask(SVI)[EltNo];
- if (InEl < LHSWidth)
- return FindScalarElement(SVI->getOperand(0), InEl);
- else if (InEl < LHSWidth*2)
- return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
- else
+ cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+ int InEl = getShuffleMask(SVI)[EltNo];
+ if (InEl < 0)
return UndefValue::get(PTy->getElementType());
+ if (InEl < (int)LHSWidth)
+ return FindScalarElement(SVI->getOperand(0), InEl);
+ return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
}
-
+
// Otherwise, we don't know.
return 0;
}
@@ -127,11 +124,11 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
// If vector val is undef, replace extract with scalar undef.
if (isa<UndefValue>(EI.getOperand(0)))
return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
-
+
// If vector val is constant 0, replace extract with scalar 0.
if (isa<ConstantAggregateZero>(EI.getOperand(0)))
return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
-
+
if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {
// If vector val is constant with all elements the same, replace EI with
// that element. When the elements are not identical, we cannot replace yet
@@ -139,53 +136,53 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
Constant *op0 = C->getOperand(0);
for (unsigned i = 1; i != C->getNumOperands(); ++i)
if (C->getOperand(i) != op0) {
- op0 = 0;
+ op0 = 0;
break;
}
if (op0)
return ReplaceInstUsesWith(EI, op0);
}
-
+
// If extracting a specified index from the vector, see if we can recursively
// find a previously computed scalar that was inserted into the vector.
if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
unsigned IndexVal = IdxC->getZExtValue();
unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
-
+
// If this is extracting an invalid index, turn this into undef, to avoid
// crashing the code below.
if (IndexVal >= VectorWidth)
return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
-
+
// This instruction only demands the single element from the input vector.
// If the input vector has a single use, simplify it based on this use
// property.
if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
APInt UndefElts(VectorWidth, 0);
APInt DemandedMask(VectorWidth, 0);
- DemandedMask.set(IndexVal);
+ DemandedMask.setBit(IndexVal);
if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
DemandedMask, UndefElts)) {
EI.setOperand(0, V);
return &EI;
}
}
-
+
if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
return ReplaceInstUsesWith(EI, Elt);
-
+
// If the this extractelement is directly using a bitcast from a vector of
// the same number of elements, see if we can find the source element from
// it. In this case, we will end up needing to bitcast the scalars.
if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
- if (const VectorType *VT =
+ if (const VectorType *VT =
dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
if (VT->getNumElements() == VectorWidth)
if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
return new BitCastInst(Elt, EI.getType());
}
}
-
+
if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
// Push extractelement into predecessor operation if legal and
// profitable to do so
@@ -193,11 +190,11 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
if (I->hasOneUse() &&
CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
Value *newEI0 =
- Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
- EI.getName()+".lhs");
+ Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
+ EI.getName()+".lhs");
Value *newEI1 =
- Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
- EI.getName()+".rhs");
+ Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
+ EI.getName()+".rhs");
return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
}
} else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
@@ -215,21 +212,22 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
// If this is extracting an element from a shufflevector, figure out where
// it came from and extract from the appropriate input element instead.
if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
- unsigned SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()];
+ int SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()];
Value *Src;
unsigned LHSWidth =
- cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
-
- if (SrcIdx < LHSWidth)
+ cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+
+ if (SrcIdx < 0)
+ return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+ if (SrcIdx < (int)LHSWidth)
Src = SVI->getOperand(0);
- else if (SrcIdx < LHSWidth*2) {
+ else {
SrcIdx -= LHSWidth;
Src = SVI->getOperand(1);
- } else {
- return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
}
+ const Type *Int32Ty = Type::getInt32Ty(EI.getContext());
return ExtractElementInst::Create(Src,
- ConstantInt::get(Type::getInt32Ty(EI.getContext()),
+ ConstantInt::get(Int32Ty,
SrcIdx, false));
}
}
@@ -239,42 +237,42 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
}
/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
-/// elements from either LHS or RHS, return the shuffle mask and true.
+/// elements from either LHS or RHS, return the shuffle mask and true.
/// Otherwise, return false.
static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
std::vector<Constant*> &Mask) {
assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
"Invalid CollectSingleShuffleElements");
unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
-
+
if (isa<UndefValue>(V)) {
Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
return true;
}
-
+
if (V == LHS) {
for (unsigned i = 0; i != NumElts; ++i)
Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
return true;
}
-
+
if (V == RHS) {
for (unsigned i = 0; i != NumElts; ++i)
Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()),
i+NumElts));
return true;
}
-
+
if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
// If this is an insert of an extract from some other vector, include it.
Value *VecOp = IEI->getOperand(0);
Value *ScalarOp = IEI->getOperand(1);
Value *IdxOp = IEI->getOperand(2);
-
+
if (!isa<ConstantInt>(IdxOp))
return false;
unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
-
+
if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
// Okay, we can handle this if the vector we are insertinting into is
// transitively ok.
@@ -282,13 +280,13 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
// If so, update the mask to reflect the inserted undef.
Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
return true;
- }
+ }
} else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
if (isa<ConstantInt>(EI->getOperand(1)) &&
EI->getOperand(0)->getType() == V->getType()) {
unsigned ExtractedIdx =
cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
-
+
// This must be extracting from either LHS or RHS.
if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
// Okay, we can handle this if the vector we are insertinting into is
@@ -296,15 +294,14 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
// If so, update the mask to reflect the inserted value.
if (EI->getOperand(0) == LHS) {
- Mask[InsertedIdx % NumElts] =
+ Mask[InsertedIdx % NumElts] =
ConstantInt::get(Type::getInt32Ty(V->getContext()),
ExtractedIdx);
} else {
assert(EI->getOperand(0) == RHS);
- Mask[InsertedIdx % NumElts] =
+ Mask[InsertedIdx % NumElts] =
ConstantInt::get(Type::getInt32Ty(V->getContext()),
ExtractedIdx+NumElts);
-
}
return true;
}
@@ -313,7 +310,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
}
}
// TODO: Handle shufflevector here!
-
+
return false;
}
@@ -322,11 +319,11 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
/// that computes V and the LHS value of the shuffle.
static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
Value *&RHS) {
- assert(V->getType()->isVectorTy() &&
+ assert(V->getType()->isVectorTy() &&
(RHS == 0 || V->getType() == RHS->getType()) &&
"Invalid shuffle!");
unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
-
+
if (isa<UndefValue>(V)) {
Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
return V;
@@ -338,25 +335,25 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
Value *VecOp = IEI->getOperand(0);
Value *ScalarOp = IEI->getOperand(1);
Value *IdxOp = IEI->getOperand(2);
-
+
if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
EI->getOperand(0)->getType() == V->getType()) {
unsigned ExtractedIdx =
- cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+ cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
-
+
// Either the extracted from or inserted into vector must be RHSVec,
// otherwise we'd end up with a shuffle of three inputs.
if (EI->getOperand(0) == RHS || RHS == 0) {
RHS = EI->getOperand(0);
Value *V = CollectShuffleElements(VecOp, Mask, RHS);
- Mask[InsertedIdx % NumElts] =
- ConstantInt::get(Type::getInt32Ty(V->getContext()),
- NumElts+ExtractedIdx);
+ Mask[InsertedIdx % NumElts] =
+ ConstantInt::get(Type::getInt32Ty(V->getContext()),
+ NumElts+ExtractedIdx);
return V;
}
-
+
if (VecOp == RHS) {
Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
// Everything but the extracted element is replaced with the RHS.
@@ -367,7 +364,7 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
}
return V;
}
-
+
// If this insertelement is a chain that comes from exactly these two
// vectors, return the vector and the effective shuffle.
if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask))
@@ -376,7 +373,7 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
}
}
// TODO: Handle shufflevector here!
-
+
// Otherwise, can't do anything fancy. Return an identity vector.
for (unsigned i = 0; i != NumElts; ++i)
Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
@@ -387,32 +384,32 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
Value *VecOp = IE.getOperand(0);
Value *ScalarOp = IE.getOperand(1);
Value *IdxOp = IE.getOperand(2);
-
+
// Inserting an undef or into an undefined place, remove this.
if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
ReplaceInstUsesWith(IE, VecOp);
-
- // If the inserted element was extracted from some other vector, and if the
+
+ // If the inserted element was extracted from some other vector, and if the
// indexes are constant, try to turn this into a shufflevector operation.
if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
EI->getOperand(0)->getType() == IE.getType()) {
unsigned NumVectorElts = IE.getType()->getNumElements();
unsigned ExtractedIdx =
- cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+ cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
-
+
if (ExtractedIdx >= NumVectorElts) // Out of range extract.
return ReplaceInstUsesWith(IE, VecOp);
-
+
if (InsertedIdx >= NumVectorElts) // Out of range insert.
return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
-
+
// If we are extracting a value from a vector, then inserting it right
// back into the same place, just use the input vector.
if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
- return ReplaceInstUsesWith(IE, VecOp);
-
+ return ReplaceInstUsesWith(IE, VecOp);
+
// If this insertelement isn't used by some other insertelement, turn it
// (and any insertelements it points to), into one big shuffle.
if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
@@ -421,18 +418,20 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
if (RHS == 0) RHS = UndefValue::get(LHS->getType());
// We now have a shuffle of LHS, RHS, Mask.
- return new ShuffleVectorInst(LHS, RHS,
- ConstantVector::get(Mask));
+ return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask));
}
}
}
-
+
unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements();
APInt UndefElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
- if (SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts))
+ if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
+ if (V != &IE)
+ return ReplaceInstUsesWith(IE, V);
return &IE;
-
+ }
+
return 0;
}
@@ -440,27 +439,29 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Value *LHS = SVI.getOperand(0);
Value *RHS = SVI.getOperand(1);
- std::vector<unsigned> Mask = getShuffleMask(&SVI);
-
+ std::vector<int> Mask = getShuffleMask(&SVI);
+
bool MadeChange = false;
-
+
// Undefined shuffle mask -> undefined value.
if (isa<UndefValue>(SVI.getOperand(2)))
return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
-
+
unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
-
+
if (VWidth != cast<VectorType>(LHS->getType())->getNumElements())
return 0;
-
+
APInt UndefElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
- if (SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
+ if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
+ if (V != &SVI)
+ return ReplaceInstUsesWith(SVI, V);
LHS = SVI.getOperand(0);
RHS = SVI.getOperand(1);
MadeChange = true;
}
-
+
// Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
// Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
if (LHS == RHS || isa<UndefValue>(LHS)) {
@@ -468,16 +469,16 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// shuffle(undef,undef,mask) -> undef.
return ReplaceInstUsesWith(SVI, LHS);
}
-
+
// Remap any references to RHS to use LHS.
std::vector<Constant*> Elts;
for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
- if (Mask[i] >= 2*e)
+ if (Mask[i] < 0)
Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
else {
- if ((Mask[i] >= e && isa<UndefValue>(RHS)) ||
- (Mask[i] < e && isa<UndefValue>(LHS))) {
- Mask[i] = 2*e; // Turn into undef.
+ if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
+ (Mask[i] < (int)e && isa<UndefValue>(LHS))) {
+ Mask[i] = -1; // Turn into undef.
Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
} else {
Mask[i] = Mask[i] % e; // Force to LHS.
@@ -493,59 +494,65 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
RHS = SVI.getOperand(1);
MadeChange = true;
}
-
+
// Analyze the shuffle, are the LHS or RHS and identity shuffles?
bool isLHSID = true, isRHSID = true;
-
+
for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
- if (Mask[i] >= e*2) continue; // Ignore undef values.
+ if (Mask[i] < 0) continue; // Ignore undef values.
// Is this an identity shuffle of the LHS value?
- isLHSID &= (Mask[i] == i);
-
+ isLHSID &= (Mask[i] == (int)i);
+
// Is this an identity shuffle of the RHS value?
isRHSID &= (Mask[i]-e == i);
}
-
+
// Eliminate identity shuffles.
if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
-
+
// If the LHS is a shufflevector itself, see if we can combine it with this
// one without producing an unusual shuffle. Here we are really conservative:
// we are absolutely afraid of producing a shuffle mask not in the input
// program, because the code gen may not be smart enough to turn a merged
// shuffle into two specific shuffles: it may produce worse code. As such,
- // we only merge two shuffles if the result is one of the two input shuffle
- // masks. In this case, merging the shuffles just removes one instruction,
- // which we know is safe. This is good for things like turning:
- // (splat(splat)) -> splat.
+ // we only merge two shuffles if the result is either a splat or one of the
+ // two input shuffle masks. In this case, merging the shuffles just removes
+ // one instruction, which we know is safe. This is good for things like
+ // turning: (splat(splat)) -> splat.
if (ShuffleVectorInst *LHSSVI = dyn_cast<ShuffleVectorInst>(LHS)) {
if (isa<UndefValue>(RHS)) {
- std::vector<unsigned> LHSMask = getShuffleMask(LHSSVI);
-
+ std::vector<int> LHSMask = getShuffleMask(LHSSVI);
+
if (LHSMask.size() == Mask.size()) {
- std::vector<unsigned> NewMask;
- for (unsigned i = 0, e = Mask.size(); i != e; ++i)
- if (Mask[i] >= e)
- NewMask.push_back(2*e);
+ std::vector<int> NewMask;
+ bool isSplat = true;
+ int SplatElt = -1; // undef
+ for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+ int MaskElt;
+ if (Mask[i] < 0 || Mask[i] >= (int)e)
+ MaskElt = -1; // undef
else
- NewMask.push_back(LHSMask[Mask[i]]);
-
+ MaskElt = LHSMask[Mask[i]];
+ // Check if this could still be a splat.
+ if (MaskElt >= 0) {
+ if (SplatElt >=0 && SplatElt != MaskElt)
+ isSplat = false;
+ SplatElt = MaskElt;
+ }
+ NewMask.push_back(MaskElt);
+ }
+
// If the result mask is equal to the src shuffle or this
// shuffle mask, do the replacement.
- if (NewMask == LHSMask || NewMask == Mask) {
- unsigned LHSInNElts =
- cast<VectorType>(LHSSVI->getOperand(0)->getType())->
- getNumElements();
+ if (isSplat || NewMask == LHSMask || NewMask == Mask) {
std::vector<Constant*> Elts;
+ const Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
- if (NewMask[i] >= LHSInNElts*2) {
- Elts.push_back(UndefValue::get(
- Type::getInt32Ty(SVI.getContext())));
+ if (NewMask[i] < 0) {
+ Elts.push_back(UndefValue::get(Int32Ty));
} else {
- Elts.push_back(ConstantInt::get(
- Type::getInt32Ty(SVI.getContext()),
- NewMask[i]));
+ Elts.push_back(ConstantInt::get(Int32Ty, NewMask[i]));
}
}
return new ShuffleVectorInst(LHSSVI->getOperand(0),
@@ -555,7 +562,6 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
}
}
}
-
+
return MadeChange ? &SVI : 0;
}
-
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index e46c679..37123d0 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -48,6 +48,7 @@
#include "llvm/Support/PatternMatch.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm-c/Initialization.h"
#include <algorithm>
#include <climits>
using namespace llvm;
@@ -57,11 +58,22 @@ STATISTIC(NumCombined , "Number of insts combined");
STATISTIC(NumConstProp, "Number of constant folds");
STATISTIC(NumDeadInst , "Number of dead inst eliminated");
STATISTIC(NumSunkInst , "Number of instructions sunk");
+STATISTIC(NumExpand, "Number of expansions");
+STATISTIC(NumFactor , "Number of factorizations");
+STATISTIC(NumReassoc , "Number of reassociations");
+// Initialization Routines
+void llvm::initializeInstCombine(PassRegistry &Registry) {
+ initializeInstCombinerPass(Registry);
+}
+
+void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
+ initializeInstCombine(*unwrap(R));
+}
char InstCombiner::ID = 0;
INITIALIZE_PASS(InstCombiner, "instcombine",
- "Combine redundant instructions", false, false);
+ "Combine redundant instructions", false, false)
void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreservedID(LCSSAID);
@@ -97,53 +109,326 @@ bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const {
}
-// SimplifyCommutative - This performs a few simplifications for commutative
-// operators:
+/// SimplifyAssociativeOrCommutative - This performs a few simplifications for
+/// operators which are associative or commutative:
+//
+// Commutative operators:
//
// 1. Order operands such that they are listed from right (least complex) to
// left (most complex). This puts constants before unary operators before
// binary operators.
//
-// 2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2))
-// 3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
+// Associative operators:
+//
+// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
+// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
+//
+// Associative and commutative operators:
+//
+// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
+// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
+// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
+// if C1 and C2 are constants.
//
-bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
+bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
+ Instruction::BinaryOps Opcode = I.getOpcode();
bool Changed = false;
- if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1)))
- Changed = !I.swapOperands();
- if (!I.isAssociative()) return Changed;
-
- Instruction::BinaryOps Opcode = I.getOpcode();
- if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0)))
- if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) {
- if (isa<Constant>(I.getOperand(1))) {
- Constant *Folded = ConstantExpr::get(I.getOpcode(),
- cast<Constant>(I.getOperand(1)),
- cast<Constant>(Op->getOperand(1)));
- I.setOperand(0, Op->getOperand(0));
- I.setOperand(1, Folded);
- return true;
+ do {
+ // Order operands such that they are listed from right (least complex) to
+ // left (most complex). This puts constants before unary operators before
+ // binary operators.
+ if (I.isCommutative() && getComplexity(I.getOperand(0)) <
+ getComplexity(I.getOperand(1)))
+ Changed = !I.swapOperands();
+
+ BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
+ BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
+
+ if (I.isAssociative()) {
+ // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
+ if (Op0 && Op0->getOpcode() == Opcode) {
+ Value *A = Op0->getOperand(0);
+ Value *B = Op0->getOperand(1);
+ Value *C = I.getOperand(1);
+
+ // Does "B op C" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, B, C, TD)) {
+ // It simplifies to V. Form "A op V".
+ I.setOperand(0, A);
+ I.setOperand(1, V);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ I.clearSubclassOptionalData();
+ Changed = true;
+ ++NumReassoc;
+ continue;
+ }
}
-
- if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1)))
- if (Op1->getOpcode() == Opcode && isa<Constant>(Op1->getOperand(1)) &&
- Op->hasOneUse() && Op1->hasOneUse()) {
- Constant *C1 = cast<Constant>(Op->getOperand(1));
- Constant *C2 = cast<Constant>(Op1->getOperand(1));
-
- // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
- Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2);
- Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0),
- Op1->getOperand(0),
- Op1->getName(), &I);
- Worklist.Add(New);
- I.setOperand(0, New);
- I.setOperand(1, Folded);
- return true;
+
+ // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
+ if (Op1 && Op1->getOpcode() == Opcode) {
+ Value *A = I.getOperand(0);
+ Value *B = Op1->getOperand(0);
+ Value *C = Op1->getOperand(1);
+
+ // Does "A op B" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, A, B, TD)) {
+ // It simplifies to V. Form "V op C".
+ I.setOperand(0, V);
+ I.setOperand(1, C);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ I.clearSubclassOptionalData();
+ Changed = true;
+ ++NumReassoc;
+ continue;
}
+ }
}
- return Changed;
+
+ if (I.isAssociative() && I.isCommutative()) {
+ // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
+ if (Op0 && Op0->getOpcode() == Opcode) {
+ Value *A = Op0->getOperand(0);
+ Value *B = Op0->getOperand(1);
+ Value *C = I.getOperand(1);
+
+ // Does "C op A" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) {
+ // It simplifies to V. Form "V op B".
+ I.setOperand(0, V);
+ I.setOperand(1, B);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ I.clearSubclassOptionalData();
+ Changed = true;
+ ++NumReassoc;
+ continue;
+ }
+ }
+
+ // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
+ if (Op1 && Op1->getOpcode() == Opcode) {
+ Value *A = I.getOperand(0);
+ Value *B = Op1->getOperand(0);
+ Value *C = Op1->getOperand(1);
+
+ // Does "C op A" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) {
+ // It simplifies to V. Form "B op V".
+ I.setOperand(0, B);
+ I.setOperand(1, V);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ I.clearSubclassOptionalData();
+ Changed = true;
+ ++NumReassoc;
+ continue;
+ }
+ }
+
+ // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
+ // if C1 and C2 are constants.
+ if (Op0 && Op1 &&
+ Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
+ isa<Constant>(Op0->getOperand(1)) &&
+ isa<Constant>(Op1->getOperand(1)) &&
+ Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *A = Op0->getOperand(0);
+ Constant *C1 = cast<Constant>(Op0->getOperand(1));
+ Value *B = Op1->getOperand(0);
+ Constant *C2 = cast<Constant>(Op1->getOperand(1));
+
+ Constant *Folded = ConstantExpr::get(Opcode, C1, C2);
+ Instruction *New = BinaryOperator::Create(Opcode, A, B, Op1->getName(),
+ &I);
+ Worklist.Add(New);
+ I.setOperand(0, New);
+ I.setOperand(1, Folded);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ I.clearSubclassOptionalData();
+ Changed = true;
+ continue;
+ }
+ }
+
+ // No further simplifications.
+ return Changed;
+ } while (1);
+}
+
+/// LeftDistributesOverRight - Whether "X LOp (Y ROp Z)" is always equal to
+/// "(X LOp Y) ROp (X LOp Z)".
+static bool LeftDistributesOverRight(Instruction::BinaryOps LOp,
+ Instruction::BinaryOps ROp) {
+ switch (LOp) {
+ default:
+ return false;
+
+ case Instruction::And:
+ // And distributes over Or and Xor.
+ switch (ROp) {
+ default:
+ return false;
+ case Instruction::Or:
+ case Instruction::Xor:
+ return true;
+ }
+
+ case Instruction::Mul:
+ // Multiplication distributes over addition and subtraction.
+ switch (ROp) {
+ default:
+ return false;
+ case Instruction::Add:
+ case Instruction::Sub:
+ return true;
+ }
+
+ case Instruction::Or:
+ // Or distributes over And.
+ switch (ROp) {
+ default:
+ return false;
+ case Instruction::And:
+ return true;
+ }
+ }
+}
+
+/// RightDistributesOverLeft - Whether "(X LOp Y) ROp Z" is always equal to
+/// "(X ROp Z) LOp (Y ROp Z)".
+static bool RightDistributesOverLeft(Instruction::BinaryOps LOp,
+ Instruction::BinaryOps ROp) {
+ if (Instruction::isCommutative(ROp))
+ return LeftDistributesOverRight(ROp, LOp);
+ // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
+ // but this requires knowing that the addition does not overflow and other
+ // such subtleties.
+ return false;
+}
+
+/// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
+/// which some other binary operation distributes over either by factorizing
+/// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
+/// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is
+/// a win). Returns the simplified value, or null if it didn't simplify.
+Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+ BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+ Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); // op
+
+ // Factorization.
+ if (Op0 && Op1 && Op0->getOpcode() == Op1->getOpcode()) {
+ // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
+ // a common term.
+ Value *A = Op0->getOperand(0), *B = Op0->getOperand(1);
+ Value *C = Op1->getOperand(0), *D = Op1->getOperand(1);
+ Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
+
+ // Does "X op' Y" always equal "Y op' X"?
+ bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
+
+ // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
+ if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode))
+ // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
+ // commutative case, "(A op' B) op (C op' A)"?
+ if (A == C || (InnerCommutative && A == D)) {
+ if (A != C)
+ std::swap(C, D);
+ // Consider forming "A op' (B op D)".
+ // If "B op D" simplifies then it can be formed with no cost.
+ Value *V = SimplifyBinOp(TopLevelOpcode, B, D, TD);
+ // If "B op D" doesn't simplify then only go on if both of the existing
+ // operations "A op' B" and "C op' D" will be zapped as no longer used.
+ if (!V && Op0->hasOneUse() && Op1->hasOneUse())
+ V = Builder->CreateBinOp(TopLevelOpcode, B, D, Op1->getName());
+ if (V) {
+ ++NumFactor;
+ V = Builder->CreateBinOp(InnerOpcode, A, V);
+ V->takeName(&I);
+ return V;
+ }
+ }
+
+ // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
+ if (RightDistributesOverLeft(TopLevelOpcode, InnerOpcode))
+ // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
+ // commutative case, "(A op' B) op (B op' D)"?
+ if (B == D || (InnerCommutative && B == C)) {
+ if (B != D)
+ std::swap(C, D);
+ // Consider forming "(A op C) op' B".
+ // If "A op C" simplifies then it can be formed with no cost.
+ Value *V = SimplifyBinOp(TopLevelOpcode, A, C, TD);
+ // If "A op C" doesn't simplify then only go on if both of the existing
+ // operations "A op' B" and "C op' D" will be zapped as no longer used.
+ if (!V && Op0->hasOneUse() && Op1->hasOneUse())
+ V = Builder->CreateBinOp(TopLevelOpcode, A, C, Op0->getName());
+ if (V) {
+ ++NumFactor;
+ V = Builder->CreateBinOp(InnerOpcode, V, B);
+ V->takeName(&I);
+ return V;
+ }
+ }
+ }
+
+ // Expansion.
+ if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
+ // The instruction has the form "(A op' B) op C". See if expanding it out
+ // to "(A op C) op' (B op C)" results in simplifications.
+ Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
+ Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
+
+ // Do "A op C" and "B op C" both simplify?
+ if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, TD))
+ if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, TD)) {
+ // They do! Return "L op' R".
+ ++NumExpand;
+ // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
+ if ((L == A && R == B) ||
+ (Instruction::isCommutative(InnerOpcode) && L == B && R == A))
+ return Op0;
+ // Otherwise return "L op' R" if it simplifies.
+ if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD))
+ return V;
+ // Otherwise, create a new instruction.
+ C = Builder->CreateBinOp(InnerOpcode, L, R);
+ C->takeName(&I);
+ return C;
+ }
+ }
+
+ if (Op1 && LeftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
+ // The instruction has the form "A op (B op' C)". See if expanding it out
+ // to "(A op B) op' (A op C)" results in simplifications.
+ Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
+ Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
+
+ // Do "A op B" and "A op C" both simplify?
+ if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, TD))
+ if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, TD)) {
+ // They do! Return "L op' R".
+ ++NumExpand;
+ // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
+ if ((L == B && R == C) ||
+ (Instruction::isCommutative(InnerOpcode) && L == C && R == B))
+ return Op1;
+ // Otherwise return "L op' R" if it simplifies.
+ if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD))
+ return V;
+ // Otherwise, create a new instruction.
+ A = Builder->CreateBinOp(InnerOpcode, L, R);
+ A->takeName(&I);
+ return A;
+ }
+ }
+
+ return 0;
}
// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
@@ -185,8 +470,9 @@ Value *InstCombiner::dyn_castFNegVal(Value *V) const {
static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
InstCombiner *IC) {
- if (CastInst *CI = dyn_cast<CastInst>(&I))
+ if (CastInst *CI = dyn_cast<CastInst>(&I)) {
return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType());
+ }
// Figure out if the constant is the left or the right argument.
bool ConstIsRHS = isa<Constant>(I.getOperand(1));
@@ -228,11 +514,24 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
// Bool selects with constant operands can be folded to logical ops.
if (SI->getType()->isIntegerTy(1)) return 0;
+ // If it's a bitcast involving vectors, make sure it has the same number of
+ // elements on both sides.
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(&Op)) {
+ const VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
+ const VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
+
+ // Verify that either both or neither are vectors.
+ if ((SrcTy == NULL) != (DestTy == NULL)) return 0;
+ // If vectors, verify that they have the same number of elements.
+ if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements())
+ return 0;
+ }
+
Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this);
- return SelectInst::Create(SI->getCondition(), SelectTrueVal,
- SelectFalseVal);
+ return SelectInst::Create(SI->getCondition(),
+ SelectTrueVal, SelectFalseVal);
}
return 0;
}
@@ -242,20 +541,25 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
/// has a PHI node as operand #0, see if we can fold the instruction into the
/// PHI (which is only possible if all operands to the PHI are constants).
///
-/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms
-/// that would normally be unprofitable because they strongly encourage jump
-/// threading.
-Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
- bool AllowAggressive) {
- AllowAggressive = false;
+Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
PHINode *PN = cast<PHINode>(I.getOperand(0));
unsigned NumPHIValues = PN->getNumIncomingValues();
- if (NumPHIValues == 0 ||
- // We normally only transform phis with a single use, unless we're trying
- // hard to make jump threading happen.
- (!PN->hasOneUse() && !AllowAggressive))
+ if (NumPHIValues == 0)
return 0;
+ // We normally only transform phis with a single use. However, if a PHI has
+ // multiple uses and they are all the same operation, we can fold *all* of the
+ // uses into the PHI.
+ if (!PN->hasOneUse()) {
+ // Walk the use list for the instruction, comparing them to I.
+ for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (User != &I && !I.isIdenticalTo(User))
+ return 0;
+ }
+ // Otherwise, we can replace *all* users with the new PHI we form.
+ }
// Check to see if all of the operands of the PHI are simple constants
// (constantint/constantfp/undef). If there is one non-constant value,
@@ -263,24 +567,34 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
// bail out. We don't do arbitrary constant expressions here because moving
// their computation can be expensive without a cost model.
BasicBlock *NonConstBB = 0;
- for (unsigned i = 0; i != NumPHIValues; ++i)
- if (!isa<Constant>(PN->getIncomingValue(i)) ||
- isa<ConstantExpr>(PN->getIncomingValue(i))) {
- if (NonConstBB) return 0; // More than one non-const value.
- if (isa<PHINode>(PN->getIncomingValue(i))) return 0; // Itself a phi.
- NonConstBB = PN->getIncomingBlock(i);
-
- // If the incoming non-constant value is in I's block, we have an infinite
- // loop.
- if (NonConstBB == I.getParent())
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ Value *InVal = PN->getIncomingValue(i);
+ if (isa<Constant>(InVal) && !isa<ConstantExpr>(InVal))
+ continue;
+
+ if (isa<PHINode>(InVal)) return 0; // Itself a phi.
+ if (NonConstBB) return 0; // More than one non-const value.
+
+ NonConstBB = PN->getIncomingBlock(i);
+
+ // If the InVal is an invoke at the end of the pred block, then we can't
+ // insert a computation after it without breaking the edge.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
+ if (II->getParent() == NonConstBB)
return 0;
- }
+
+ // If the incoming non-constant value is in I's block, we will remove one
+ // instruction, but insert another equivalent one, leading to infinite
+ // instcombine.
+ if (NonConstBB == I.getParent())
+ return 0;
+ }
// If there is exactly one non-constant value, we can insert a copy of the
// operation in that block. However, if this is a critical edge, we would be
// inserting the computation one some other paths (e.g. inside a loop). Only
// do this if the pred block is unconditionally branching into the phi block.
- if (NonConstBB != 0 && !AllowAggressive) {
+ if (NonConstBB != 0) {
BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
if (!BI || !BI->isUnconditional()) return 0;
}
@@ -290,7 +604,12 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
NewPN->reserveOperandSpace(PN->getNumOperands()/2);
InsertNewInstBefore(NewPN, *PN);
NewPN->takeName(PN);
-
+
+ // If we are going to have to insert a new computation, do so right before the
+ // predecessors terminator.
+ if (NonConstBB)
+ Builder->SetInsertPoint(NonConstBB->getTerminator());
+
// Next, add all of the operands to the PHI.
if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
// We only currently try to fold the condition of a select when it is a phi,
@@ -303,42 +622,36 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
Value *InV = 0;
- if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
- } else {
- assert(PN->getIncomingBlock(i) == NonConstBB);
- InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred,
- FalseVInPred,
- "phitmp", NonConstBB->getTerminator());
- Worklist.Add(cast<Instruction>(InV));
- }
+ else
+ InV = Builder->CreateSelect(PN->getIncomingValue(i),
+ TrueVInPred, FalseVInPred, "phitmp");
NewPN->addIncoming(InV, ThisBB);
}
+ } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) {
+ Constant *C = cast<Constant>(I.getOperand(1));
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ Value *InV = 0;
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+ InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
+ else if (isa<ICmpInst>(CI))
+ InV = Builder->CreateICmp(CI->getPredicate(), PN->getIncomingValue(i),
+ C, "phitmp");
+ else
+ InV = Builder->CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i),
+ C, "phitmp");
+ NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+ }
} else if (I.getNumOperands() == 2) {
Constant *C = cast<Constant>(I.getOperand(1));
for (unsigned i = 0; i != NumPHIValues; ++i) {
Value *InV = 0;
- if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
- if (CmpInst *CI = dyn_cast<CmpInst>(&I))
- InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
- else
- InV = ConstantExpr::get(I.getOpcode(), InC, C);
- } else {
- assert(PN->getIncomingBlock(i) == NonConstBB);
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
- InV = BinaryOperator::Create(BO->getOpcode(),
- PN->getIncomingValue(i), C, "phitmp",
- NonConstBB->getTerminator());
- else if (CmpInst *CI = dyn_cast<CmpInst>(&I))
- InV = CmpInst::Create(CI->getOpcode(),
- CI->getPredicate(),
- PN->getIncomingValue(i), C, "phitmp",
- NonConstBB->getTerminator());
- else
- llvm_unreachable("Unknown binop!");
-
- Worklist.Add(cast<Instruction>(InV));
- }
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+ InV = ConstantExpr::get(I.getOpcode(), InC, C);
+ else
+ InV = Builder->CreateBinOp(cast<BinaryOperator>(I).getOpcode(),
+ PN->getIncomingValue(i), C, "phitmp");
NewPN->addIncoming(InV, PN->getIncomingBlock(i));
}
} else {
@@ -346,18 +659,22 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
const Type *RetTy = CI->getType();
for (unsigned i = 0; i != NumPHIValues; ++i) {
Value *InV;
- if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
- } else {
- assert(PN->getIncomingBlock(i) == NonConstBB);
- InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i),
- I.getType(), "phitmp",
- NonConstBB->getTerminator());
- Worklist.Add(cast<Instruction>(InV));
- }
+ else
+ InV = Builder->CreateCast(CI->getOpcode(),
+ PN->getIncomingValue(i), I.getType(), "phitmp");
NewPN->addIncoming(InV, PN->getIncomingBlock(i));
}
}
+
+ for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+ UI != E; ) {
+ Instruction *User = cast<Instruction>(*UI++);
+ if (User == &I) continue;
+ ReplaceInstUsesWith(*User, NewPN);
+ EraseInstFromFunction(*User);
+ }
return ReplaceInstUsesWith(I, NewPN);
}
@@ -432,28 +749,35 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Value *PtrOp = GEP.getOperand(0);
- if (isa<UndefValue>(GEP.getOperand(0)))
- return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType()));
-
- // Eliminate unneeded casts for indices.
+ // Eliminate unneeded casts for indices, and replace indices which displace
+ // by multiples of a zero size type with zero.
if (TD) {
bool MadeChange = false;
- unsigned PtrSize = TD->getPointerSizeInBits();
-
+ const Type *IntPtrTy = TD->getIntPtrType(GEP.getContext());
+
gep_type_iterator GTI = gep_type_begin(GEP);
for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
I != E; ++I, ++GTI) {
- if (!isa<SequentialType>(*GTI)) continue;
-
- // If we are using a wider index than needed for this platform, shrink it
- // to what we need. If narrower, sign-extend it to what we need. This
- // explicit cast can make subsequent optimizations more obvious.
- unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth();
- if (OpBits == PtrSize)
- continue;
-
- *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true);
- MadeChange = true;
+ // Skip indices into struct types.
+ const SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI);
+ if (!SeqTy) continue;
+
+ // If the element type has zero size then any index over it is equivalent
+ // to an index of zero, so replace it with zero if it is not zero already.
+ if (SeqTy->getElementType()->isSized() &&
+ TD->getTypeAllocSize(SeqTy->getElementType()) == 0)
+ if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
+ *I = Constant::getNullValue(IntPtrTy);
+ MadeChange = true;
+ }
+
+ if ((*I)->getType() != IntPtrTy) {
+ // If we are using a wider index than needed for this platform, shrink
+ // it to what we need. If narrower, sign-extend it to what we need.
+ // This explicit cast can make subsequent optimizations more obvious.
+ *I = Builder->CreateIntCast(*I, IntPtrTy, true);
+ MadeChange = true;
+ }
}
if (MadeChange) return &GEP;
}
@@ -940,6 +1264,14 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
EraseInstFromFunction(*II);
return BinaryOperator::CreateAdd(LHS, RHS);
}
+
+ // If the normal result of the add is dead, and the RHS is a constant,
+ // we can transform this into a range comparison.
+ // overflow = uadd a, -4 --> overflow = icmp ugt a, 3
+ if (II->getIntrinsicID() == Intrinsic::uadd_with_overflow)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getArgOperand(1)))
+ return new ICmpInst(ICmpInst::ICMP_UGT, II->getArgOperand(0),
+ ConstantExpr::getNot(CI));
break;
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
@@ -964,10 +1296,37 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
}
}
}
- // Can't simplify extracts from other values. Note that nested extracts are
- // already simplified implicitely by the above (extract ( extract (insert) )
+ if (LoadInst *L = dyn_cast<LoadInst>(Agg))
+ // If the (non-volatile) load only has one use, we can rewrite this to a
+ // load from a GEP. This reduces the size of the load.
+ // FIXME: If a load is used only by extractvalue instructions then this
+ // could be done regardless of having multiple uses.
+ if (!L->isVolatile() && L->hasOneUse()) {
+ // extractvalue has integer indices, getelementptr has Value*s. Convert.
+ SmallVector<Value*, 4> Indices;
+ // Prefix an i32 0 since we need the first element.
+ Indices.push_back(Builder->getInt32(0));
+ for (ExtractValueInst::idx_iterator I = EV.idx_begin(), E = EV.idx_end();
+ I != E; ++I)
+ Indices.push_back(Builder->getInt32(*I));
+
+ // We need to insert these at the location of the old load, not at that of
+ // the extractvalue.
+ Builder->SetInsertPoint(L->getParent(), L);
+ Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(),
+ Indices.begin(), Indices.end());
+ // Returning the load directly will cause the main loop to insert it in
+ // the wrong spot, so use ReplaceInstUsesWith().
+ return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP));
+ }
+ // We could simplify extracts from other values. Note that nested extracts may
+ // already be simplified implicitly by the above: extract (extract (insert) )
// will be translated into extract ( insert ( extract ) ) first and then just
- // the value inserted, if appropriate).
+ // the value inserted, if appropriate. Similarly for extracts from single-use
+ // loads: extract (extract (load)) will be translated to extract (load (gep))
+ // and if again single-use then via load (gep (gep)) to load (gep).
+ // However, double extracts from e.g. function arguments or return values
+ // aren't handled yet.
return 0;
}
@@ -1023,10 +1382,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
bool MadeIRChange = false;
SmallVector<BasicBlock*, 256> Worklist;
Worklist.push_back(BB);
-
- std::vector<Instruction*> InstrsForInstCombineWorklist;
- InstrsForInstCombineWorklist.reserve(128);
+ SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
SmallPtrSet<ConstantExpr*, 64> FoldedConstants;
do {
@@ -1231,6 +1588,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
DEBUG(errs() << "IC: Old = " << *I << '\n'
<< " New = " << *Result << '\n');
+ Result->setDebugLoc(I->getDebugLoc());
// Everything uses the new instruction now.
I->replaceAllUsesWith(Result);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp
index a77d70c..1d31fcc 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp
@@ -17,6 +17,7 @@
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "insert-edge-profiling"
+
#include "ProfilingUtils.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
@@ -34,7 +35,9 @@ namespace {
bool runOnModule(Module &M);
public:
static char ID; // Pass identification, replacement for typeid
- EdgeProfiler() : ModulePass(ID) {}
+ EdgeProfiler() : ModulePass(ID) {
+ initializeEdgeProfilerPass(*PassRegistry::getPassRegistry());
+ }
virtual const char *getPassName() const {
return "Edge Profiler";
@@ -44,7 +47,7 @@ namespace {
char EdgeProfiler::ID = 0;
INITIALIZE_PASS(EdgeProfiler, "insert-edge-profiling",
- "Insert instrumentation for edge profiling", false, false);
+ "Insert instrumentation for edge profiling", false, false)
ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); }
@@ -98,7 +101,7 @@ bool EdgeProfiler::runOnModule(Module &M) {
// otherwise insert it in the successor block.
if (TI->getNumSuccessors() == 1) {
// Insert counter at the start of the block
- IncrementCounterInBlock(BB, i++, Counters);
+ IncrementCounterInBlock(BB, i++, Counters, false);
} else {
// Insert counter at the start of the block
IncrementCounterInBlock(TI->getSuccessor(s), i++, Counters);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
new file mode 100644
index 0000000..96ed4fa
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -0,0 +1,32 @@
+//===-- Instrumentation.cpp - TransformUtils Infrastructure ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common initialization infrastructure for the
+// Instrumentation library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeInstrumentation - Initialize all passes in the TransformUtils
+/// library.
+void llvm::initializeInstrumentation(PassRegistry &Registry) {
+ initializeEdgeProfilerPass(Registry);
+ initializeOptimalEdgeProfilerPass(Registry);
+ initializePathProfilerPass(Registry);
+}
+
+/// LLVMInitializeInstrumentation - C binding for
+/// initializeInstrumentation.
+void LLVMInitializeInstrumentation(LLVMPassRegistryRef R) {
+ initializeInstrumentation(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
index 8eec987..c85a1a9 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -36,7 +36,9 @@ namespace {
bool runOnModule(Module &M);
public:
static char ID; // Pass identification, replacement for typeid
- OptimalEdgeProfiler() : ModulePass(ID) {}
+ OptimalEdgeProfiler() : ModulePass(ID) {
+ initializeOptimalEdgeProfilerPass(*PassRegistry::getPassRegistry());
+ }
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(ProfileEstimatorPassID);
@@ -50,9 +52,14 @@ namespace {
}
char OptimalEdgeProfiler::ID = 0;
-INITIALIZE_PASS(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
+INITIALIZE_PASS_BEGIN(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
+ "Insert optimal instrumentation for edge profiling",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(ProfileEstimatorPass)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
"Insert optimal instrumentation for edge profiling",
- false, false);
+ false, false)
ModulePass *llvm::createOptimalEdgeProfilerPass() {
return new OptimalEdgeProfiler();
@@ -125,11 +132,11 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) {
// Calculate a Maximum Spanning Tree with the edge weights determined by
// ProfileEstimator. ProfileEstimator also assign weights to the virtual
// edges (0,entry) and (BB,0) (for blocks with no successors) and this
- // edges also participate in the maximum spanning tree calculation.
+ // edges also participate in the maximum spanning tree calculation.
// The third parameter of MaximumSpanningTree() has the effect that not the
// actual MST is returned but the edges _not_ in the MST.
- ProfileInfo::EdgeWeights ECs =
+ ProfileInfo::EdgeWeights ECs =
getAnalysis<ProfileInfo>(*F).getEdgeWeights(F);
std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end());
MaximumSpanningTree<BasicBlock> MST (EdgeVector);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp
new file mode 100644
index 0000000..6449b39
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp
@@ -0,0 +1,1423 @@
+//===- PathProfiling.cpp - Inserts counters for path profiling ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass instruments functions for Ball-Larus path profiling. Ball-Larus
+// profiling converts the CFG into a DAG by replacing backedges with edges
+// from entry to the start block and from the end block to exit. The paths
+// along the new DAG are enumrated, i.e. each path is given a path number.
+// Edges are instrumented to increment the path number register, such that the
+// path number register will equal the path number of the path taken at the
+// exit.
+//
+// This file defines classes for building a CFG for use with different stages
+// in the Ball-Larus path profiling instrumentation [Ball96]. The
+// requirements are formatting the llvm CFG into the Ball-Larus DAG, path
+// numbering, finding a spanning tree, moving increments from the spanning
+// tree to chords.
+//
+// Terms:
+// DAG - Directed Acyclic Graph.
+// Ball-Larus DAG - A CFG with an entry node, an exit node, and backedges
+// removed in the following manner. For every backedge
+// v->w, insert edge ENTRY->w and edge v->EXIT.
+// Path Number - The number corresponding to a specific path through a
+// Ball-Larus DAG.
+// Spanning Tree - A subgraph, S, is a spanning tree if S covers all
+// vertices and is a tree.
+// Chord - An edge not in the spanning tree.
+//
+// [Ball96]
+// T. Ball and J. R. Larus. "Efficient Path Profiling."
+// International Symposium on Microarchitecture, pages 46-57, 1996.
+// http://portal.acm.org/citation.cfm?id=243857
+//
+// [Ball94]
+// Thomas Ball. "Efficiently Counting Program Events with Support for
+// On-line queries."
+// ACM Transactions on Programmmg Languages and Systems, Vol 16, No 5,
+// September 1994, Pages 1399-1410.
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "insert-path-profiling"
+
+#include "llvm/DerivedTypes.h"
+#include "ProfilingUtils.h"
+#include "llvm/Analysis/PathNumbering.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TypeBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include <map>
+#include <vector>
+
+#define HASH_THRESHHOLD 100000
+
+using namespace llvm;
+
+namespace {
+class BLInstrumentationNode;
+class BLInstrumentationEdge;
+class BLInstrumentationDag;
+
+// ---------------------------------------------------------------------------
+// BLInstrumentationNode extends BallLarusNode with member used by the
+// instrumentation algortihms.
+// ---------------------------------------------------------------------------
+class BLInstrumentationNode : public BallLarusNode {
+public:
+ // Creates a new BLInstrumentationNode from a BasicBlock.
+ BLInstrumentationNode(BasicBlock* BB);
+
+ // Get/sets the Value corresponding to the pathNumber register,
+ // constant or phinode. Used by the instrumentation code to remember
+ // path number Values.
+ Value* getStartingPathNumber();
+ void setStartingPathNumber(Value* pathNumber);
+
+ Value* getEndingPathNumber();
+ void setEndingPathNumber(Value* pathNumber);
+
+ // Get/set the PHINode Instruction for this node.
+ PHINode* getPathPHI();
+ void setPathPHI(PHINode* pathPHI);
+
+private:
+
+ Value* _startingPathNumber; // The Value for the current pathNumber.
+ Value* _endingPathNumber; // The Value for the current pathNumber.
+ PHINode* _pathPHI; // The PHINode for current pathNumber.
+};
+
+// --------------------------------------------------------------------------
+// BLInstrumentationEdge extends BallLarusEdge with data about the
+// instrumentation that will end up on each edge.
+// --------------------------------------------------------------------------
+class BLInstrumentationEdge : public BallLarusEdge {
+public:
+ BLInstrumentationEdge(BLInstrumentationNode* source,
+ BLInstrumentationNode* target);
+
+ // Sets the target node of this edge. Required to split edges.
+ void setTarget(BallLarusNode* node);
+
+ // Get/set whether edge is in the spanning tree.
+ bool isInSpanningTree() const;
+ void setIsInSpanningTree(bool isInSpanningTree);
+
+ // Get/ set whether this edge will be instrumented with a path number
+ // initialization.
+ bool isInitialization() const;
+ void setIsInitialization(bool isInitialization);
+
+ // Get/set whether this edge will be instrumented with a path counter
+ // increment. Notice this is incrementing the path counter
+ // corresponding to the path number register. The path number
+ // increment is determined by getIncrement().
+ bool isCounterIncrement() const;
+ void setIsCounterIncrement(bool isCounterIncrement);
+
+ // Get/set the path number increment that this edge will be instrumented
+ // with. This is distinct from the path counter increment and the
+ // weight. The counter increment counts the number of executions of
+ // some path, whereas the path number keeps track of which path number
+ // the program is on.
+ long getIncrement() const;
+ void setIncrement(long increment);
+
+ // Get/set whether the edge has been instrumented.
+ bool hasInstrumentation();
+ void setHasInstrumentation(bool hasInstrumentation);
+
+ // Returns the successor number of this edge in the source.
+ unsigned getSuccessorNumber();
+
+private:
+ // The increment that the code will be instrumented with.
+ long long _increment;
+
+ // Whether this edge is in the spanning tree.
+ bool _isInSpanningTree;
+
+ // Whether this edge is an initialiation of the path number.
+ bool _isInitialization;
+
+ // Whether this edge is a path counter increment.
+ bool _isCounterIncrement;
+
+ // Whether this edge has been instrumented.
+ bool _hasInstrumentation;
+};
+
+// ---------------------------------------------------------------------------
+// BLInstrumentationDag extends BallLarusDag with algorithms that
+// determine where instrumentation should be placed.
+// ---------------------------------------------------------------------------
+class BLInstrumentationDag : public BallLarusDag {
+public:
+ BLInstrumentationDag(Function &F);
+
+ // Returns the Exit->Root edge. This edge is required for creating
+ // directed cycles in the algorithm for moving instrumentation off of
+ // the spanning tree
+ BallLarusEdge* getExitRootEdge();
+
+ // Returns an array of phony edges which mark those nodes
+ // with function calls
+ BLEdgeVector getCallPhonyEdges();
+
+ // Gets/sets the path counter array
+ GlobalVariable* getCounterArray();
+ void setCounterArray(GlobalVariable* c);
+
+ // Calculates the increments for the chords, thereby removing
+ // instrumentation from the spanning tree edges. Implementation is based
+ // on the algorithm in Figure 4 of [Ball94]
+ void calculateChordIncrements();
+
+ // Updates the state when an edge has been split
+ void splitUpdate(BLInstrumentationEdge* formerEdge, BasicBlock* newBlock);
+
+ // Calculates a spanning tree of the DAG ignoring cycles. Whichever
+ // edges are in the spanning tree will not be instrumented, but this
+ // implementation does not try to minimize the instrumentation overhead
+ // by trying to find hot edges.
+ void calculateSpanningTree();
+
+ // Pushes initialization further down in order to group the first
+ // increment and initialization.
+ void pushInitialization();
+
+ // Pushes the path counter increments up in order to group the last path
+ // number increment.
+ void pushCounters();
+
+ // Removes phony edges from the successor list of the source, and the
+ // predecessor list of the target.
+ void unlinkPhony();
+
+ // Generate dot graph for the function
+ void generateDotGraph();
+
+protected:
+ // BLInstrumentationDag creates BLInstrumentationNode objects in this
+ // method overriding the creation of BallLarusNode objects.
+ //
+ // Allows subclasses to determine which type of Node is created.
+ // Override this method to produce subclasses of BallLarusNode if
+ // necessary.
+ virtual BallLarusNode* createNode(BasicBlock* BB);
+
+ // BLInstrumentationDag create BLInstrumentationEdges.
+ //
+ // Allows subclasses to determine which type of Edge is created.
+ // Override this method to produce subclasses of BallLarusEdge if
+ // necessary. Parameters source and target will have been created by
+ // createNode and can be cast to the subclass of BallLarusNode*
+ // returned by createNode.
+ virtual BallLarusEdge* createEdge(
+ BallLarusNode* source, BallLarusNode* target, unsigned edgeNumber);
+
+private:
+ BLEdgeVector _treeEdges; // All edges in the spanning tree.
+ BLEdgeVector _chordEdges; // All edges not in the spanning tree.
+ GlobalVariable* _counterArray; // Array to store path counters
+
+ // Removes the edge from the appropriate predecessor and successor lists.
+ void unlinkEdge(BallLarusEdge* edge);
+
+ // Makes an edge part of the spanning tree.
+ void makeEdgeSpanning(BLInstrumentationEdge* edge);
+
+ // Pushes initialization and calls itself recursively.
+ void pushInitializationFromEdge(BLInstrumentationEdge* edge);
+
+ // Pushes path counter increments up recursively.
+ void pushCountersFromEdge(BLInstrumentationEdge* edge);
+
+ // Depth first algorithm for determining the chord increments.f
+ void calculateChordIncrementsDfs(
+ long weight, BallLarusNode* v, BallLarusEdge* e);
+
+ // Determines the relative direction of two edges.
+ int calculateChordIncrementsDir(BallLarusEdge* e, BallLarusEdge* f);
+};
+
+// ---------------------------------------------------------------------------
+// PathProfiler is a module pass which intruments path profiling instructions
+// ---------------------------------------------------------------------------
+class PathProfiler : public ModulePass {
+private:
+ // Current context for multi threading support.
+ LLVMContext* Context;
+
+ // Which function are we currently instrumenting
+ unsigned currentFunctionNumber;
+
+ // The function prototype in the profiling runtime for incrementing a
+ // single path counter in a hash table.
+ Constant* llvmIncrementHashFunction;
+ Constant* llvmDecrementHashFunction;
+
+ // Instruments each function with path profiling. 'main' is instrumented
+ // with code to save the profile to disk.
+ bool runOnModule(Module &M);
+
+ // Analyzes the function for Ball-Larus path profiling, and inserts code.
+ void runOnFunction(std::vector<Constant*> &ftInit, Function &F, Module &M);
+
+ // Creates an increment constant representing incr.
+ ConstantInt* createIncrementConstant(long incr, int bitsize);
+
+ // Creates an increment constant representing the value in
+ // edge->getIncrement().
+ ConstantInt* createIncrementConstant(BLInstrumentationEdge* edge);
+
+ // Finds the insertion point after pathNumber in block. PathNumber may
+ // be NULL.
+ BasicBlock::iterator getInsertionPoint(
+ BasicBlock* block, Value* pathNumber);
+
+ // Inserts source's pathNumber Value* into target. Target may or may not
+ // have multiple predecessors, and may or may not have its phiNode
+ // initalized.
+ void pushValueIntoNode(
+ BLInstrumentationNode* source, BLInstrumentationNode* target);
+
+ // Inserts source's pathNumber Value* into the appropriate slot of
+ // target's phiNode.
+ void pushValueIntoPHI(
+ BLInstrumentationNode* target, BLInstrumentationNode* source);
+
+ // The Value* in node, oldVal, is updated with a Value* correspodning to
+ // oldVal + addition.
+ void insertNumberIncrement(BLInstrumentationNode* node, Value* addition,
+ bool atBeginning);
+
+ // Creates a counter increment in the given node. The Value* in node is
+ // taken as the index into a hash table.
+ void insertCounterIncrement(
+ Value* incValue,
+ BasicBlock::iterator insertPoint,
+ BLInstrumentationDag* dag,
+ bool increment = true);
+
+ // A PHINode is created in the node, and its values initialized to -1U.
+ void preparePHI(BLInstrumentationNode* node);
+
+ // Inserts instrumentation for the given edge
+ //
+ // Pre: The edge's source node has pathNumber set if edge is non zero
+ // path number increment.
+ //
+ // Post: Edge's target node has a pathNumber set to the path number Value
+ // corresponding to the value of the path register after edge's
+ // execution.
+ void insertInstrumentationStartingAt(
+ BLInstrumentationEdge* edge,
+ BLInstrumentationDag* dag);
+
+ // If this edge is a critical edge, then inserts a node at this edge.
+ // This edge becomes the first edge, and a new BallLarusEdge is created.
+ bool splitCritical(BLInstrumentationEdge* edge, BLInstrumentationDag* dag);
+
+ // Inserts instrumentation according to the marked edges in dag. Phony
+ // edges must be unlinked from the DAG, but accessible from the
+ // backedges. Dag must have initializations, path number increments, and
+ // counter increments present.
+ //
+ // Counter storage is created here.
+ void insertInstrumentation( BLInstrumentationDag& dag, Module &M);
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ PathProfiler() : ModulePass(ID) {
+ initializePathProfilerPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual const char *getPassName() const {
+ return "Path Profiler";
+ }
+};
+} // end anonymous namespace
+
+// Should we print the dot-graphs
+static cl::opt<bool> DotPathDag("path-profile-pathdag", cl::Hidden,
+ cl::desc("Output the path profiling DAG for each function."));
+
+// Register the path profiler as a pass
+char PathProfiler::ID = 0;
+INITIALIZE_PASS(PathProfiler, "insert-path-profiling",
+ "Insert instrumentation for Ball-Larus path profiling",
+ false, false)
+
+ModulePass *llvm::createPathProfilerPass() { return new PathProfiler(); }
+
+namespace llvm {
+ class PathProfilingFunctionTable {};
+
+ // Type for global array storing references to hashes or arrays
+ template<bool xcompile> class TypeBuilder<PathProfilingFunctionTable,
+ xcompile> {
+ public:
+ static const StructType *get(LLVMContext& C) {
+ return( StructType::get(
+ C, TypeBuilder<types::i<32>, xcompile>::get(C), // type
+ TypeBuilder<types::i<32>, xcompile>::get(C), // array size
+ TypeBuilder<types::i<8>*, xcompile>::get(C), // array/hash ptr
+ NULL));
+ }
+ };
+
+ typedef TypeBuilder<PathProfilingFunctionTable, true>
+ ftEntryTypeBuilder;
+
+ // BallLarusEdge << operator overloading
+ raw_ostream& operator<<(raw_ostream& os,
+ const BLInstrumentationEdge& edge) {
+ os << "[" << edge.getSource()->getName() << " -> "
+ << edge.getTarget()->getName() << "] init: "
+ << (edge.isInitialization() ? "yes" : "no")
+ << " incr:" << edge.getIncrement() << " cinc: "
+ << (edge.isCounterIncrement() ? "yes" : "no");
+ return(os);
+ }
+}
+
+// Creates a new BLInstrumentationNode from a BasicBlock.
+BLInstrumentationNode::BLInstrumentationNode(BasicBlock* BB) :
+ BallLarusNode(BB),
+ _startingPathNumber(NULL), _endingPathNumber(NULL), _pathPHI(NULL) {}
+
+// Constructor for BLInstrumentationEdge.
+BLInstrumentationEdge::BLInstrumentationEdge(BLInstrumentationNode* source,
+ BLInstrumentationNode* target)
+ : BallLarusEdge(source, target, 0),
+ _increment(0), _isInSpanningTree(false), _isInitialization(false),
+ _isCounterIncrement(false), _hasInstrumentation(false) {}
+
+// Sets the target node of this edge. Required to split edges.
+void BLInstrumentationEdge::setTarget(BallLarusNode* node) {
+ _target = node;
+}
+
+// Returns whether this edge is in the spanning tree.
+bool BLInstrumentationEdge::isInSpanningTree() const {
+ return(_isInSpanningTree);
+}
+
+// Sets whether this edge is in the spanning tree.
+void BLInstrumentationEdge::setIsInSpanningTree(bool isInSpanningTree) {
+ _isInSpanningTree = isInSpanningTree;
+}
+
+// Returns whether this edge will be instrumented with a path number
+// initialization.
+bool BLInstrumentationEdge::isInitialization() const {
+ return(_isInitialization);
+}
+
+// Sets whether this edge will be instrumented with a path number
+// initialization.
+void BLInstrumentationEdge::setIsInitialization(bool isInitialization) {
+ _isInitialization = isInitialization;
+}
+
+// Returns whether this edge will be instrumented with a path counter
+// increment. Notice this is incrementing the path counter
+// corresponding to the path number register. The path number
+// increment is determined by getIncrement().
+bool BLInstrumentationEdge::isCounterIncrement() const {
+ return(_isCounterIncrement);
+}
+
+// Sets whether this edge will be instrumented with a path counter
+// increment.
+void BLInstrumentationEdge::setIsCounterIncrement(bool isCounterIncrement) {
+ _isCounterIncrement = isCounterIncrement;
+}
+
+// Gets the path number increment that this edge will be instrumented
+// with. This is distinct from the path counter increment and the
+// weight. The counter increment is counts the number of executions of
+// some path, whereas the path number keeps track of which path number
+// the program is on.
+long BLInstrumentationEdge::getIncrement() const {
+ return(_increment);
+}
+
+// Set whether this edge will be instrumented with a path number
+// increment.
+void BLInstrumentationEdge::setIncrement(long increment) {
+ _increment = increment;
+}
+
+// True iff the edge has already been instrumented.
+bool BLInstrumentationEdge::hasInstrumentation() {
+ return(_hasInstrumentation);
+}
+
+// Set whether this edge has been instrumented.
+void BLInstrumentationEdge::setHasInstrumentation(bool hasInstrumentation) {
+ _hasInstrumentation = hasInstrumentation;
+}
+
+// Returns the successor number of this edge in the source.
+unsigned BLInstrumentationEdge::getSuccessorNumber() {
+ BallLarusNode* sourceNode = getSource();
+ BallLarusNode* targetNode = getTarget();
+ BasicBlock* source = sourceNode->getBlock();
+ BasicBlock* target = targetNode->getBlock();
+
+ if(source == NULL || target == NULL)
+ return(0);
+
+ TerminatorInst* terminator = source->getTerminator();
+
+ unsigned i;
+ for(i=0; i < terminator->getNumSuccessors(); i++) {
+ if(terminator->getSuccessor(i) == target)
+ break;
+ }
+
+ return(i);
+}
+
+// BLInstrumentationDag constructor initializes a DAG for the given Function.
+BLInstrumentationDag::BLInstrumentationDag(Function &F) : BallLarusDag(F),
+ _counterArray(0) {
+}
+
+// Returns the Exit->Root edge. This edge is required for creating
+// directed cycles in the algorithm for moving instrumentation off of
+// the spanning tree
+BallLarusEdge* BLInstrumentationDag::getExitRootEdge() {
+ BLEdgeIterator erEdge = getExit()->succBegin();
+ return(*erEdge);
+}
+
+BLEdgeVector BLInstrumentationDag::getCallPhonyEdges () {
+ BLEdgeVector callEdges;
+
+ for( BLEdgeIterator edge = _edges.begin(), end = _edges.end();
+ edge != end; edge++ ) {
+ if( (*edge)->getType() == BallLarusEdge::CALLEDGE_PHONY )
+ callEdges.push_back(*edge);
+ }
+
+ return callEdges;
+}
+
+// Gets the path counter array
+GlobalVariable* BLInstrumentationDag::getCounterArray() {
+ return _counterArray;
+}
+
+void BLInstrumentationDag::setCounterArray(GlobalVariable* c) {
+ _counterArray = c;
+}
+
+// Calculates the increment for the chords, thereby removing
+// instrumentation from the spanning tree edges. Implementation is based on
+// the algorithm in Figure 4 of [Ball94]
+void BLInstrumentationDag::calculateChordIncrements() {
+ calculateChordIncrementsDfs(0, getRoot(), NULL);
+
+ BLInstrumentationEdge* chord;
+ for(BLEdgeIterator chordEdge = _chordEdges.begin(),
+ end = _chordEdges.end(); chordEdge != end; chordEdge++) {
+ chord = (BLInstrumentationEdge*) *chordEdge;
+ chord->setIncrement(chord->getIncrement() + chord->getWeight());
+ }
+}
+
+// Updates the state when an edge has been split
+void BLInstrumentationDag::splitUpdate(BLInstrumentationEdge* formerEdge,
+ BasicBlock* newBlock) {
+ BallLarusNode* oldTarget = formerEdge->getTarget();
+ BallLarusNode* newNode = addNode(newBlock);
+ formerEdge->setTarget(newNode);
+ newNode->addPredEdge(formerEdge);
+
+ DEBUG(dbgs() << " Edge split: " << *formerEdge << "\n");
+
+ oldTarget->removePredEdge(formerEdge);
+ BallLarusEdge* newEdge = addEdge(newNode, oldTarget,0);
+
+ if( formerEdge->getType() == BallLarusEdge::BACKEDGE ||
+ formerEdge->getType() == BallLarusEdge::SPLITEDGE) {
+ newEdge->setType(formerEdge->getType());
+ newEdge->setPhonyRoot(formerEdge->getPhonyRoot());
+ newEdge->setPhonyExit(formerEdge->getPhonyExit());
+ formerEdge->setType(BallLarusEdge::NORMAL);
+ formerEdge->setPhonyRoot(NULL);
+ formerEdge->setPhonyExit(NULL);
+ }
+}
+
+// Calculates a spanning tree of the DAG ignoring cycles. Whichever
+// edges are in the spanning tree will not be instrumented, but this
+// implementation does not try to minimize the instrumentation overhead
+// by trying to find hot edges.
+void BLInstrumentationDag::calculateSpanningTree() {
+ std::stack<BallLarusNode*> dfsStack;
+
+ for(BLNodeIterator nodeIt = _nodes.begin(), end = _nodes.end();
+ nodeIt != end; nodeIt++) {
+ (*nodeIt)->setColor(BallLarusNode::WHITE);
+ }
+
+ dfsStack.push(getRoot());
+ while(dfsStack.size() > 0) {
+ BallLarusNode* node = dfsStack.top();
+ dfsStack.pop();
+
+ if(node->getColor() == BallLarusNode::WHITE)
+ continue;
+
+ BallLarusNode* nextNode;
+ bool forward = true;
+ BLEdgeIterator succEnd = node->succEnd();
+
+ node->setColor(BallLarusNode::WHITE);
+ // first iterate over successors then predecessors
+ for(BLEdgeIterator edge = node->succBegin(), predEnd = node->predEnd();
+ edge != predEnd; edge++) {
+ if(edge == succEnd) {
+ edge = node->predBegin();
+ forward = false;
+ }
+
+ // Ignore split edges
+ if ((*edge)->getType() == BallLarusEdge::SPLITEDGE)
+ continue;
+
+ nextNode = forward? (*edge)->getTarget(): (*edge)->getSource();
+ if(nextNode->getColor() != BallLarusNode::WHITE) {
+ nextNode->setColor(BallLarusNode::WHITE);
+ makeEdgeSpanning((BLInstrumentationEdge*)(*edge));
+ }
+ }
+ }
+
+ for(BLEdgeIterator edge = _edges.begin(), end = _edges.end();
+ edge != end; edge++) {
+ BLInstrumentationEdge* instEdge = (BLInstrumentationEdge*) (*edge);
+ // safe since createEdge is overriden
+ if(!instEdge->isInSpanningTree() && (*edge)->getType()
+ != BallLarusEdge::SPLITEDGE)
+ _chordEdges.push_back(instEdge);
+ }
+}
+
+// Pushes initialization further down in order to group the first
+// increment and initialization.
+void BLInstrumentationDag::pushInitialization() {
+ BLInstrumentationEdge* exitRootEdge =
+ (BLInstrumentationEdge*) getExitRootEdge();
+ exitRootEdge->setIsInitialization(true);
+ pushInitializationFromEdge(exitRootEdge);
+}
+
+// Pushes the path counter increments up in order to group the last path
+// number increment.
+void BLInstrumentationDag::pushCounters() {
+ BLInstrumentationEdge* exitRootEdge =
+ (BLInstrumentationEdge*) getExitRootEdge();
+ exitRootEdge->setIsCounterIncrement(true);
+ pushCountersFromEdge(exitRootEdge);
+}
+
+// Removes phony edges from the successor list of the source, and the
+// predecessor list of the target.
+void BLInstrumentationDag::unlinkPhony() {
+ BallLarusEdge* edge;
+
+ for(BLEdgeIterator next = _edges.begin(),
+ end = _edges.end(); next != end; next++) {
+ edge = (*next);
+
+ if( edge->getType() == BallLarusEdge::BACKEDGE_PHONY ||
+ edge->getType() == BallLarusEdge::SPLITEDGE_PHONY ||
+ edge->getType() == BallLarusEdge::CALLEDGE_PHONY ) {
+ unlinkEdge(edge);
+ }
+ }
+}
+
+// Generate a .dot graph to represent the DAG and pathNumbers
+void BLInstrumentationDag::generateDotGraph() {
+ std::string errorInfo;
+ std::string functionName = getFunction().getNameStr();
+ std::string filename = "pathdag." + functionName + ".dot";
+
+ DEBUG (dbgs() << "Writing '" << filename << "'...\n");
+ raw_fd_ostream dotFile(filename.c_str(), errorInfo);
+
+ if (!errorInfo.empty()) {
+ errs() << "Error opening '" << filename.c_str() <<"' for writing!";
+ errs() << "\n";
+ return;
+ }
+
+ dotFile << "digraph " << functionName << " {\n";
+
+ for( BLEdgeIterator edge = _edges.begin(), end = _edges.end();
+ edge != end; edge++) {
+ std::string sourceName = (*edge)->getSource()->getName();
+ std::string targetName = (*edge)->getTarget()->getName();
+
+ dotFile << "\t\"" << sourceName.c_str() << "\" -> \""
+ << targetName.c_str() << "\" ";
+
+ long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement();
+
+ switch( (*edge)->getType() ) {
+ case BallLarusEdge::NORMAL:
+ dotFile << "[label=" << inc << "] [color=black];\n";
+ break;
+
+ case BallLarusEdge::BACKEDGE:
+ dotFile << "[color=cyan];\n";
+ break;
+
+ case BallLarusEdge::BACKEDGE_PHONY:
+ dotFile << "[label=" << inc
+ << "] [color=blue];\n";
+ break;
+
+ case BallLarusEdge::SPLITEDGE:
+ dotFile << "[color=violet];\n";
+ break;
+
+ case BallLarusEdge::SPLITEDGE_PHONY:
+ dotFile << "[label=" << inc << "] [color=red];\n";
+ break;
+
+ case BallLarusEdge::CALLEDGE_PHONY:
+ dotFile << "[label=" << inc << "] [color=green];\n";
+ break;
+ }
+ }
+
+ dotFile << "}\n";
+}
+
+// Allows subclasses to determine which type of Node is created.
+// Override this method to produce subclasses of BallLarusNode if
+// necessary. The destructor of BallLarusDag will call free on each pointer
+// created.
+BallLarusNode* BLInstrumentationDag::createNode(BasicBlock* BB) {
+ return( new BLInstrumentationNode(BB) );
+}
+
+// Allows subclasses to determine which type of Edge is created.
+// Override this method to produce subclasses of BallLarusEdge if
+// necessary. The destructor of BallLarusDag will call free on each pointer
+// created.
+BallLarusEdge* BLInstrumentationDag::createEdge(BallLarusNode* source,
+ BallLarusNode* target, unsigned edgeNumber) {
+ // One can cast from BallLarusNode to BLInstrumentationNode since createNode
+ // is overriden to produce BLInstrumentationNode.
+ return( new BLInstrumentationEdge((BLInstrumentationNode*)source,
+ (BLInstrumentationNode*)target) );
+}
+
+// Sets the Value corresponding to the pathNumber register, constant,
+// or phinode. Used by the instrumentation code to remember path
+// number Values.
+Value* BLInstrumentationNode::getStartingPathNumber(){
+ return(_startingPathNumber);
+}
+
+// Sets the Value of the pathNumber. Used by the instrumentation code.
+void BLInstrumentationNode::setStartingPathNumber(Value* pathNumber) {
+ DEBUG(dbgs() << " SPN-" << getName() << " <-- " << (pathNumber ?
+ pathNumber->getNameStr() : "unused") << "\n");
+ _startingPathNumber = pathNumber;
+}
+
+Value* BLInstrumentationNode::getEndingPathNumber(){
+ return(_endingPathNumber);
+}
+
+void BLInstrumentationNode::setEndingPathNumber(Value* pathNumber) {
+ DEBUG(dbgs() << " EPN-" << getName() << " <-- "
+ << (pathNumber ? pathNumber->getNameStr() : "unused") << "\n");
+ _endingPathNumber = pathNumber;
+}
+
+// Get the PHINode Instruction for this node. Used by instrumentation
+// code.
+PHINode* BLInstrumentationNode::getPathPHI() {
+ return(_pathPHI);
+}
+
+// Set the PHINode Instruction for this node. Used by instrumentation
+// code.
+void BLInstrumentationNode::setPathPHI(PHINode* pathPHI) {
+ _pathPHI = pathPHI;
+}
+
+// Removes the edge from the appropriate predecessor and successor
+// lists.
+void BLInstrumentationDag::unlinkEdge(BallLarusEdge* edge) {
+ if(edge == getExitRootEdge())
+ DEBUG(dbgs() << " Removing exit->root edge\n");
+
+ edge->getSource()->removeSuccEdge(edge);
+ edge->getTarget()->removePredEdge(edge);
+}
+
+// Makes an edge part of the spanning tree.
+void BLInstrumentationDag::makeEdgeSpanning(BLInstrumentationEdge* edge) {
+ edge->setIsInSpanningTree(true);
+ _treeEdges.push_back(edge);
+}
+
+// Pushes initialization and calls itself recursively.
+void BLInstrumentationDag::pushInitializationFromEdge(
+ BLInstrumentationEdge* edge) {
+ BallLarusNode* target;
+
+ target = edge->getTarget();
+ if( target->getNumberPredEdges() > 1 || target == getExit() ) {
+ return;
+ } else {
+ for(BLEdgeIterator next = target->succBegin(),
+ end = target->succEnd(); next != end; next++) {
+ BLInstrumentationEdge* intoEdge = (BLInstrumentationEdge*) *next;
+
+ // Skip split edges
+ if (intoEdge->getType() == BallLarusEdge::SPLITEDGE)
+ continue;
+
+ intoEdge->setIncrement(intoEdge->getIncrement() +
+ edge->getIncrement());
+ intoEdge->setIsInitialization(true);
+ pushInitializationFromEdge(intoEdge);
+ }
+
+ edge->setIncrement(0);
+ edge->setIsInitialization(false);
+ }
+}
+
+// Pushes path counter increments up recursively.
+void BLInstrumentationDag::pushCountersFromEdge(BLInstrumentationEdge* edge) {
+ BallLarusNode* source;
+
+ source = edge->getSource();
+ if(source->getNumberSuccEdges() > 1 || source == getRoot()
+ || edge->isInitialization()) {
+ return;
+ } else {
+ for(BLEdgeIterator previous = source->predBegin(),
+ end = source->predEnd(); previous != end; previous++) {
+ BLInstrumentationEdge* fromEdge = (BLInstrumentationEdge*) *previous;
+
+ // Skip split edges
+ if (fromEdge->getType() == BallLarusEdge::SPLITEDGE)
+ continue;
+
+ fromEdge->setIncrement(fromEdge->getIncrement() +
+ edge->getIncrement());
+ fromEdge->setIsCounterIncrement(true);
+ pushCountersFromEdge(fromEdge);
+ }
+
+ edge->setIncrement(0);
+ edge->setIsCounterIncrement(false);
+ }
+}
+
+// Depth first algorithm for determining the chord increments.
+void BLInstrumentationDag::calculateChordIncrementsDfs(long weight,
+ BallLarusNode* v, BallLarusEdge* e) {
+ BLInstrumentationEdge* f;
+
+ for(BLEdgeIterator treeEdge = _treeEdges.begin(),
+ end = _treeEdges.end(); treeEdge != end; treeEdge++) {
+ f = (BLInstrumentationEdge*) *treeEdge;
+ if(e != f && v == f->getTarget()) {
+ calculateChordIncrementsDfs(
+ calculateChordIncrementsDir(e,f)*(weight) +
+ f->getWeight(), f->getSource(), f);
+ }
+ if(e != f && v == f->getSource()) {
+ calculateChordIncrementsDfs(
+ calculateChordIncrementsDir(e,f)*(weight) +
+ f->getWeight(), f->getTarget(), f);
+ }
+ }
+
+ for(BLEdgeIterator chordEdge = _chordEdges.begin(),
+ end = _chordEdges.end(); chordEdge != end; chordEdge++) {
+ f = (BLInstrumentationEdge*) *chordEdge;
+ if(v == f->getSource() || v == f->getTarget()) {
+ f->setIncrement(f->getIncrement() +
+ calculateChordIncrementsDir(e,f)*weight);
+ }
+ }
+}
+
+// Determines the relative direction of two edges.
+int BLInstrumentationDag::calculateChordIncrementsDir(BallLarusEdge* e,
+ BallLarusEdge* f) {
+ if( e == NULL)
+ return(1);
+ else if(e->getSource() == f->getTarget()
+ || e->getTarget() == f->getSource())
+ return(1);
+
+ return(-1);
+}
+
+// Creates an increment constant representing incr.
+ConstantInt* PathProfiler::createIncrementConstant(long incr,
+ int bitsize) {
+ return(ConstantInt::get(IntegerType::get(*Context, 32), incr));
+}
+
+// Creates an increment constant representing the value in
+// edge->getIncrement().
+ConstantInt* PathProfiler::createIncrementConstant(
+ BLInstrumentationEdge* edge) {
+ return(createIncrementConstant(edge->getIncrement(), 32));
+}
+
+// Finds the insertion point after pathNumber in block. PathNumber may
+// be NULL.
+BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value*
+ pathNumber) {
+ if(pathNumber == NULL || isa<ConstantInt>(pathNumber)
+ || (((Instruction*)(pathNumber))->getParent()) != block) {
+ return(block->getFirstNonPHI());
+ } else {
+ Instruction* pathNumberInst = (Instruction*) (pathNumber);
+ BasicBlock::iterator insertPoint;
+ BasicBlock::iterator end = block->end();
+
+ for(insertPoint = block->begin();
+ insertPoint != end; insertPoint++) {
+ Instruction* insertInst = &(*insertPoint);
+
+ if(insertInst == pathNumberInst)
+ return(++insertPoint);
+ }
+
+ return(insertPoint);
+ }
+}
+
+// A PHINode is created in the node, and its values initialized to -1U.
+void PathProfiler::preparePHI(BLInstrumentationNode* node) {
+ BasicBlock* block = node->getBlock();
+ BasicBlock::iterator insertPoint = block->getFirstNonPHI();
+ PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context), "pathNumber",
+ insertPoint );
+ node->setPathPHI(phi);
+ node->setStartingPathNumber(phi);
+ node->setEndingPathNumber(phi);
+
+ for(pred_iterator predIt = pred_begin(node->getBlock()),
+ end = pred_end(node->getBlock()); predIt != end; predIt++) {
+ BasicBlock* pred = (*predIt);
+
+ if(pred != NULL)
+ phi->addIncoming(createIncrementConstant((long)-1, 32), pred);
+ }
+}
+
+// Inserts source's pathNumber Value* into target. Target may or may not
+// have multiple predecessors, and may or may not have its phiNode
+// initalized.
+void PathProfiler::pushValueIntoNode(BLInstrumentationNode* source,
+ BLInstrumentationNode* target) {
+ if(target->getBlock() == NULL)
+ return;
+
+
+ if(target->getNumberPredEdges() <= 1) {
+ assert(target->getStartingPathNumber() == NULL &&
+ "Target already has path number");
+ target->setStartingPathNumber(source->getEndingPathNumber());
+ target->setEndingPathNumber(source->getEndingPathNumber());
+ DEBUG(dbgs() << " Passing path number"
+ << (source->getEndingPathNumber() ? "" : " (null)")
+ << " value through.\n");
+ } else {
+ if(target->getPathPHI() == NULL) {
+ DEBUG(dbgs() << " Initializing PHI node for block '"
+ << target->getName() << "'\n");
+ preparePHI(target);
+ }
+ pushValueIntoPHI(target, source);
+ DEBUG(dbgs() << " Passing number value into PHI for block '"
+ << target->getName() << "'\n");
+ }
+}
+
+// Inserts source's pathNumber Value* into the appropriate slot of
+// target's phiNode.
+void PathProfiler::pushValueIntoPHI(BLInstrumentationNode* target,
+ BLInstrumentationNode* source) {
+ PHINode* phi = target->getPathPHI();
+ assert(phi != NULL && " Tried to push value into node with PHI, but node"
+ " actually had no PHI.");
+ phi->removeIncomingValue(source->getBlock(), false);
+ phi->addIncoming(source->getEndingPathNumber(), source->getBlock());
+}
+
+// The Value* in node, oldVal, is updated with a Value* correspodning to
+// oldVal + addition.
+void PathProfiler::insertNumberIncrement(BLInstrumentationNode* node,
+ Value* addition, bool atBeginning) {
+ BasicBlock* block = node->getBlock();
+ assert(node->getStartingPathNumber() != NULL);
+ assert(node->getEndingPathNumber() != NULL);
+
+ BasicBlock::iterator insertPoint;
+
+ if( atBeginning )
+ insertPoint = block->getFirstNonPHI();
+ else
+ insertPoint = block->getTerminator();
+
+ DEBUG(errs() << " Creating addition instruction.\n");
+ Value* newpn = BinaryOperator::Create(Instruction::Add,
+ node->getStartingPathNumber(),
+ addition, "pathNumber", insertPoint);
+
+ node->setEndingPathNumber(newpn);
+
+ if( atBeginning )
+ node->setStartingPathNumber(newpn);
+}
+
+// Creates a counter increment in the given node. The Value* in node is
+// taken as the index into an array or hash table. The hash table access
+// is a call to the runtime.
+void PathProfiler::insertCounterIncrement(Value* incValue,
+ BasicBlock::iterator insertPoint,
+ BLInstrumentationDag* dag,
+ bool increment) {
+ // Counter increment for array
+ if( dag->getNumberOfPaths() <= HASH_THRESHHOLD ) {
+ // Get pointer to the array location
+ std::vector<Value*> gepIndices(2);
+ gepIndices[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
+ gepIndices[1] = incValue;
+
+ GetElementPtrInst* pcPointer =
+ GetElementPtrInst::Create(dag->getCounterArray(),
+ gepIndices.begin(), gepIndices.end(),
+ "counterInc", insertPoint);
+
+ // Load from the array - call it oldPC
+ LoadInst* oldPc = new LoadInst(pcPointer, "oldPC", insertPoint);
+
+ // Test to see whether adding 1 will overflow the counter
+ ICmpInst* isMax = new ICmpInst(insertPoint, CmpInst::ICMP_ULT, oldPc,
+ createIncrementConstant(0xffffffff, 32),
+ "isMax");
+
+ // Select increment for the path counter based on overflow
+ SelectInst* inc =
+ SelectInst::Create( isMax, createIncrementConstant(increment?1:-1,32),
+ createIncrementConstant(0,32),
+ "pathInc", insertPoint);
+
+ // newPc = oldPc + inc
+ BinaryOperator* newPc = BinaryOperator::Create(Instruction::Add,
+ oldPc, inc, "newPC",
+ insertPoint);
+
+ // Store back in to the array
+ new StoreInst(newPc, pcPointer, insertPoint);
+ } else { // Counter increment for hash
+ std::vector<Value*> args(2);
+ args[0] = ConstantInt::get(Type::getInt32Ty(*Context),
+ currentFunctionNumber);
+ args[1] = incValue;
+
+ CallInst::Create(
+ increment ? llvmIncrementHashFunction : llvmDecrementHashFunction,
+ args.begin(), args.end(), "", insertPoint);
+ }
+}
+
+// Inserts instrumentation for the given edge
+//
+// Pre: The edge's source node has pathNumber set if edge is non zero
+// path number increment.
+//
+// Post: Edge's target node has a pathNumber set to the path number Value
+// corresponding to the value of the path register after edge's
+// execution.
+//
+// FIXME: This should be reworked so it's not recursive.
+void PathProfiler::insertInstrumentationStartingAt(BLInstrumentationEdge* edge,
+ BLInstrumentationDag* dag) {
+ // Mark the edge as instrumented
+ edge->setHasInstrumentation(true);
+ DEBUG(dbgs() << "\nInstrumenting edge: " << (*edge) << "\n");
+
+ // create a new node for this edge's instrumentation
+ splitCritical(edge, dag);
+
+ BLInstrumentationNode* sourceNode = (BLInstrumentationNode*)edge->getSource();
+ BLInstrumentationNode* targetNode = (BLInstrumentationNode*)edge->getTarget();
+ BLInstrumentationNode* instrumentNode;
+ BLInstrumentationNode* nextSourceNode;
+
+ bool atBeginning = false;
+
+ // Source node has only 1 successor so any information can be simply
+ // inserted in to it without splitting
+ if( sourceNode->getBlock() && sourceNode->getNumberSuccEdges() <= 1) {
+ DEBUG(dbgs() << " Potential instructions to be placed in: "
+ << sourceNode->getName() << " (at end)\n");
+ instrumentNode = sourceNode;
+ nextSourceNode = targetNode; // ... since we never made any new nodes
+ }
+
+ // The target node only has one predecessor, so we can safely insert edge
+ // instrumentation into it. If there was splitting, it must have been
+ // successful.
+ else if( targetNode->getNumberPredEdges() == 1 ) {
+ DEBUG(dbgs() << " Potential instructions to be placed in: "
+ << targetNode->getName() << " (at beginning)\n");
+ pushValueIntoNode(sourceNode, targetNode);
+ instrumentNode = targetNode;
+ nextSourceNode = NULL; // ... otherwise we'll just keep splitting
+ atBeginning = true;
+ }
+
+ // Somehow, splitting must have failed.
+ else {
+ errs() << "Instrumenting could not split a critical edge.\n";
+ DEBUG(dbgs() << " Couldn't split edge " << (*edge) << ".\n");
+ return;
+ }
+
+ // Insert instrumentation if this is a back or split edge
+ if( edge->getType() == BallLarusEdge::BACKEDGE ||
+ edge->getType() == BallLarusEdge::SPLITEDGE ) {
+ BLInstrumentationEdge* top =
+ (BLInstrumentationEdge*) edge->getPhonyRoot();
+ BLInstrumentationEdge* bottom =
+ (BLInstrumentationEdge*) edge->getPhonyExit();
+
+ assert( top->isInitialization() && " Top phony edge did not"
+ " contain a path number initialization.");
+ assert( bottom->isCounterIncrement() && " Bottom phony edge"
+ " did not contain a path counter increment.");
+
+ // split edge has yet to be initialized
+ if( !instrumentNode->getEndingPathNumber() ) {
+ instrumentNode->setStartingPathNumber(createIncrementConstant(0,32));
+ instrumentNode->setEndingPathNumber(createIncrementConstant(0,32));
+ }
+
+ BasicBlock::iterator insertPoint = atBeginning ?
+ instrumentNode->getBlock()->getFirstNonPHI() :
+ instrumentNode->getBlock()->getTerminator();
+
+ // add information from the bottom edge, if it exists
+ if( bottom->getIncrement() ) {
+ Value* newpn =
+ BinaryOperator::Create(Instruction::Add,
+ instrumentNode->getStartingPathNumber(),
+ createIncrementConstant(bottom),
+ "pathNumber", insertPoint);
+ instrumentNode->setEndingPathNumber(newpn);
+ }
+
+ insertCounterIncrement(instrumentNode->getEndingPathNumber(),
+ insertPoint, dag);
+
+ if( atBeginning )
+ instrumentNode->setStartingPathNumber(createIncrementConstant(top));
+
+ instrumentNode->setEndingPathNumber(createIncrementConstant(top));
+
+ // Check for path counter increments
+ if( top->isCounterIncrement() ) {
+ insertCounterIncrement(instrumentNode->getEndingPathNumber(),
+ instrumentNode->getBlock()->getTerminator(),dag);
+ instrumentNode->setEndingPathNumber(0);
+ }
+ }
+
+ // Insert instrumentation if this is a normal edge
+ else {
+ BasicBlock::iterator insertPoint = atBeginning ?
+ instrumentNode->getBlock()->getFirstNonPHI() :
+ instrumentNode->getBlock()->getTerminator();
+
+ if( edge->isInitialization() ) { // initialize path number
+ instrumentNode->setEndingPathNumber(createIncrementConstant(edge));
+ } else if( edge->getIncrement() ) {// increment path number
+ Value* newpn =
+ BinaryOperator::Create(Instruction::Add,
+ instrumentNode->getStartingPathNumber(),
+ createIncrementConstant(edge),
+ "pathNumber", insertPoint);
+ instrumentNode->setEndingPathNumber(newpn);
+
+ if( atBeginning )
+ instrumentNode->setStartingPathNumber(newpn);
+ }
+
+ // Check for path counter increments
+ if( edge->isCounterIncrement() ) {
+ insertCounterIncrement(instrumentNode->getEndingPathNumber(),
+ insertPoint, dag);
+ instrumentNode->setEndingPathNumber(0);
+ }
+ }
+
+ // Push it along
+ if (nextSourceNode && instrumentNode->getEndingPathNumber())
+ pushValueIntoNode(instrumentNode, nextSourceNode);
+
+ // Add all the successors
+ for( BLEdgeIterator next = targetNode->succBegin(),
+ end = targetNode->succEnd(); next != end; next++ ) {
+ // So long as it is un-instrumented, add it to the list
+ if( !((BLInstrumentationEdge*)(*next))->hasInstrumentation() )
+ insertInstrumentationStartingAt((BLInstrumentationEdge*)*next,dag);
+ else
+ DEBUG(dbgs() << " Edge " << *(BLInstrumentationEdge*)(*next)
+ << " already instrumented.\n");
+ }
+}
+
+// Inserts instrumentation according to the marked edges in dag. Phony edges
+// must be unlinked from the DAG, but accessible from the backedges. Dag
+// must have initializations, path number increments, and counter increments
+// present.
+//
+// Counter storage is created here.
+void PathProfiler::insertInstrumentation(
+ BLInstrumentationDag& dag, Module &M) {
+
+ BLInstrumentationEdge* exitRootEdge =
+ (BLInstrumentationEdge*) dag.getExitRootEdge();
+ insertInstrumentationStartingAt(exitRootEdge, &dag);
+
+ // Iterate through each call edge and apply the appropriate hash increment
+ // and decrement functions
+ BLEdgeVector callEdges = dag.getCallPhonyEdges();
+ for( BLEdgeIterator edge = callEdges.begin(),
+ end = callEdges.end(); edge != end; edge++ ) {
+ BLInstrumentationNode* node =
+ (BLInstrumentationNode*)(*edge)->getSource();
+ BasicBlock::iterator insertPoint = node->getBlock()->getFirstNonPHI();
+
+ // Find the first function call
+ while( ((Instruction&)(*insertPoint)).getOpcode() != Instruction::Call )
+ insertPoint++;
+
+ DEBUG(dbgs() << "\nInstrumenting method call block '"
+ << node->getBlock()->getNameStr() << "'\n");
+ DEBUG(dbgs() << " Path number initialized: "
+ << ((node->getStartingPathNumber()) ? "yes" : "no") << "\n");
+
+ Value* newpn;
+ if( node->getStartingPathNumber() ) {
+ long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement();
+ if ( inc )
+ newpn = BinaryOperator::Create(Instruction::Add,
+ node->getStartingPathNumber(),
+ createIncrementConstant(inc,32),
+ "pathNumber", insertPoint);
+ else
+ newpn = node->getStartingPathNumber();
+ } else {
+ newpn = (Value*)createIncrementConstant(
+ ((BLInstrumentationEdge*)(*edge))->getIncrement(), 32);
+ }
+
+ insertCounterIncrement(newpn, insertPoint, &dag);
+ insertCounterIncrement(newpn, node->getBlock()->getTerminator(),
+ &dag, false);
+ }
+}
+
+// Entry point of the module
+void PathProfiler::runOnFunction(std::vector<Constant*> &ftInit,
+ Function &F, Module &M) {
+ // Build DAG from CFG
+ BLInstrumentationDag dag = BLInstrumentationDag(F);
+ dag.init();
+
+ // give each path a unique integer value
+ dag.calculatePathNumbers();
+
+ // modify path increments to increase the efficiency
+ // of instrumentation
+ dag.calculateSpanningTree();
+ dag.calculateChordIncrements();
+ dag.pushInitialization();
+ dag.pushCounters();
+ dag.unlinkPhony();
+
+ // potentially generate .dot graph for the dag
+ if (DotPathDag)
+ dag.generateDotGraph ();
+
+ // Should we store the information in an array or hash
+ if( dag.getNumberOfPaths() <= HASH_THRESHHOLD ) {
+ const Type* t = ArrayType::get(Type::getInt32Ty(*Context),
+ dag.getNumberOfPaths());
+
+ dag.setCounterArray(new GlobalVariable(M, t, false,
+ GlobalValue::InternalLinkage,
+ Constant::getNullValue(t), ""));
+ }
+
+ insertInstrumentation(dag, M);
+
+ // Add to global function reference table
+ unsigned type;
+ const Type* voidPtr = TypeBuilder<types::i<8>*, true>::get(*Context);
+
+ if( dag.getNumberOfPaths() <= HASH_THRESHHOLD )
+ type = ProfilingArray;
+ else
+ type = ProfilingHash;
+
+ std::vector<Constant*> entryArray(3);
+ entryArray[0] = createIncrementConstant(type,32);
+ entryArray[1] = createIncrementConstant(dag.getNumberOfPaths(),32);
+ entryArray[2] = dag.getCounterArray() ?
+ ConstantExpr::getBitCast(dag.getCounterArray(), voidPtr) :
+ Constant::getNullValue(voidPtr);
+
+ const StructType* at = ftEntryTypeBuilder::get(*Context);
+ ConstantStruct* functionEntry =
+ (ConstantStruct*)ConstantStruct::get(at, entryArray);
+ ftInit.push_back(functionEntry);
+}
+
+// Output the bitcode if we want to observe instrumentation changess
+#define PRINT_MODULE dbgs() << \
+ "\n\n============= MODULE BEGIN ===============\n" << M << \
+ "\n============== MODULE END ================\n"
+
+bool PathProfiler::runOnModule(Module &M) {
+ Context = &M.getContext();
+
+ DEBUG(dbgs()
+ << "****************************************\n"
+ << "****************************************\n"
+ << "** **\n"
+ << "** PATH PROFILING INSTRUMENTATION **\n"
+ << "** **\n"
+ << "****************************************\n"
+ << "****************************************\n");
+
+ // No main, no instrumentation!
+ Function *Main = M.getFunction("main");
+
+ // Using fortran? ... this kind of works
+ if (!Main)
+ Main = M.getFunction("MAIN__");
+
+ if (!Main) {
+ errs() << "WARNING: cannot insert path profiling into a module"
+ << " with no main function!\n";
+ return false;
+ }
+
+ BasicBlock::iterator insertPoint = Main->getEntryBlock().getFirstNonPHI();
+
+ llvmIncrementHashFunction = M.getOrInsertFunction(
+ "llvm_increment_path_count",
+ Type::getVoidTy(*Context), // return type
+ Type::getInt32Ty(*Context), // function number
+ Type::getInt32Ty(*Context), // path number
+ NULL );
+
+ llvmDecrementHashFunction = M.getOrInsertFunction(
+ "llvm_decrement_path_count",
+ Type::getVoidTy(*Context), // return type
+ Type::getInt32Ty(*Context), // function number
+ Type::getInt32Ty(*Context), // path number
+ NULL );
+
+ std::vector<Constant*> ftInit;
+ unsigned functionNumber = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) {
+ if (F->isDeclaration())
+ continue;
+
+ DEBUG(dbgs() << "Function: " << F->getNameStr() << "\n");
+ functionNumber++;
+
+ // set function number
+ currentFunctionNumber = functionNumber;
+ runOnFunction(ftInit, *F, M);
+ }
+
+ const Type *t = ftEntryTypeBuilder::get(*Context);
+ const ArrayType* ftArrayType = ArrayType::get(t, ftInit.size());
+ Constant* ftInitConstant = ConstantArray::get(ftArrayType, ftInit);
+
+ DEBUG(dbgs() << " ftArrayType:" << *ftArrayType << "\n");
+
+ GlobalVariable* functionTable =
+ new GlobalVariable(M, ftArrayType, false, GlobalValue::InternalLinkage,
+ ftInitConstant, "functionPathTable");
+ const Type *eltType = ftArrayType->getTypeAtIndex((unsigned)0);
+ InsertProfilingInitCall(Main, "llvm_start_path_profiling", functionTable,
+ PointerType::getUnqual(eltType));
+
+ DEBUG(PRINT_MODULE);
+
+ return true;
+}
+
+// If this edge is a critical edge, then inserts a node at this edge.
+// This edge becomes the first edge, and a new BallLarusEdge is created.
+// Returns true if the edge was split
+bool PathProfiler::splitCritical(BLInstrumentationEdge* edge,
+ BLInstrumentationDag* dag) {
+ unsigned succNum = edge->getSuccessorNumber();
+ BallLarusNode* sourceNode = edge->getSource();
+ BallLarusNode* targetNode = edge->getTarget();
+ BasicBlock* sourceBlock = sourceNode->getBlock();
+ BasicBlock* targetBlock = targetNode->getBlock();
+
+ if(sourceBlock == NULL || targetBlock == NULL
+ || sourceNode->getNumberSuccEdges() <= 1
+ || targetNode->getNumberPredEdges() == 1 ) {
+ return(false);
+ }
+
+ TerminatorInst* terminator = sourceBlock->getTerminator();
+
+ if( SplitCriticalEdge(terminator, succNum, this, false)) {
+ BasicBlock* newBlock = terminator->getSuccessor(succNum);
+ dag->splitUpdate(edge, newBlock);
+ return(true);
+ } else
+ return(false);
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp
index 1a30e9b..b57bbf6 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -22,12 +22,13 @@
#include "llvm/Module.h"
void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
- GlobalValue *Array) {
+ GlobalValue *Array,
+ PointerType *arrayType) {
LLVMContext &Context = MainFn->getContext();
- const Type *ArgVTy =
+ const Type *ArgVTy =
PointerType::getUnqual(Type::getInt8PtrTy(Context));
- const PointerType *UIntPtr =
- Type::getInt32PtrTy(Context);
+ const PointerType *UIntPtr = arrayType ? arrayType :
+ Type::getInt32PtrTy(Context);
Module &M = *MainFn->getParent();
Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context),
Type::getInt32Ty(Context),
@@ -71,9 +72,9 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
case 2:
AI = MainFn->arg_begin(); ++AI;
if (AI->getType() != ArgVTy) {
- Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy,
+ Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy,
false);
- InitCall->setArgOperand(1,
+ InitCall->setArgOperand(1,
CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall));
} else {
InitCall->setArgOperand(1, AI);
@@ -93,7 +94,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
}
opcode = CastInst::getCastOpcode(AI, true,
Type::getInt32Ty(Context), true);
- InitCall->setArgOperand(0,
+ InitCall->setArgOperand(0,
CastInst::Create(opcode, AI, Type::getInt32Ty(Context),
"argc.cast", InitCall));
} else {
@@ -106,9 +107,10 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
}
void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
- GlobalValue *CounterArray) {
+ GlobalValue *CounterArray, bool beginning) {
// Insert the increment after any alloca or PHI instructions...
- BasicBlock::iterator InsertPos = BB->getFirstNonPHI();
+ BasicBlock::iterator InsertPos = beginning ? BB->getFirstNonPHI() :
+ BB->getTerminator();
while (isa<AllocaInst>(InsertPos))
++InsertPos;
@@ -118,7 +120,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
std::vector<Constant*> Indices(2);
Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context));
Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum);
- Constant *ElementPtr =
+ Constant *ElementPtr =
ConstantExpr::getGetElementPtr(CounterArray, &Indices[0],
Indices.size());
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h
index 94efffe..a76e357 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h
@@ -21,11 +21,14 @@ namespace llvm {
class Function;
class GlobalValue;
class BasicBlock;
+ class PointerType;
void InsertProfilingInitCall(Function *MainFn, const char *FnName,
- GlobalValue *Arr = 0);
+ GlobalValue *Arr = 0,
+ PointerType *arrayType = 0);
void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
- GlobalValue *CounterArray);
+ GlobalValue *CounterArray,
+ bool beginning = true);
}
#endif
diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
index ada086e..a5adb5e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -33,7 +33,9 @@ STATISTIC(NumRemoved, "Number of instructions removed");
namespace {
struct ADCE : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- ADCE() : FunctionPass(ID) {}
+ ADCE() : FunctionPass(ID) {
+ initializeADCEPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function& F);
@@ -45,7 +47,7 @@ namespace {
}
char ADCE::ID = 0;
-INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false);
+INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
bool ADCE::runOnFunction(Function& F) {
SmallPtrSet<Instruction*, 128> alive;
diff --git a/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp
index b144678..cee5502 100644
--- a/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp
@@ -41,7 +41,9 @@ STATISTIC(NumMoved, "Number of basic blocks moved");
namespace {
struct BlockPlacement : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- BlockPlacement() : FunctionPass(ID) {}
+ BlockPlacement() : FunctionPass(ID) {
+ initializeBlockPlacementPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
@@ -74,8 +76,11 @@ namespace {
}
char BlockPlacement::ID = 0;
-INITIALIZE_PASS(BlockPlacement, "block-placement",
- "Profile Guided Basic Block Placement", false, false);
+INITIALIZE_PASS_BEGIN(BlockPlacement, "block-placement",
+ "Profile Guided Basic Block Placement", false, false)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(BlockPlacement, "block-placement",
+ "Profile Guided Basic Block Placement", false, false)
FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); }
diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
index e07b761..9536939 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -22,6 +22,8 @@
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
@@ -31,6 +33,7 @@
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
@@ -39,31 +42,59 @@
#include "llvm/Support/PatternMatch.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/ValueHandle.h"
using namespace llvm;
using namespace llvm::PatternMatch;
+STATISTIC(NumBlocksElim, "Number of blocks eliminated");
+STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
+STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
+STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
+ "sunken Cmps");
+STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
+ "of sunken Casts");
+STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
+ "computations were sunk");
+STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
+STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
+
static cl::opt<bool>
CriticalEdgeSplit("cgp-critical-edge-splitting",
cl::desc("Split critical edges during codegen prepare"),
- cl::init(true), cl::Hidden);
+ cl::init(false), cl::Hidden);
namespace {
class CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
const TargetLowering *TLI;
+ DominatorTree *DT;
ProfileInfo *PFI;
+
+ /// CurInstIterator - As we scan instructions optimizing them, this is the
+ /// next instruction to optimize. Xforms that can invalidate this should
+ /// update it.
+ BasicBlock::iterator CurInstIterator;
/// BackEdges - Keep a set of all the loop back edges.
///
SmallSet<std::pair<const BasicBlock*, const BasicBlock*>, 8> BackEdges;
+
+ // Keeps track of non-local addresses that have been sunk into a block. This
+ // allows us to avoid inserting duplicate code for blocks with multiple
+ // load/stores of the same address.
+ DenseMap<Value*, Value*> SunkAddrs;
+
public:
static char ID; // Pass identification, replacement for typeid
explicit CodeGenPrepare(const TargetLowering *tli = 0)
- : FunctionPass(ID), TLI(tli) {}
+ : FunctionPass(ID), TLI(tli) {
+ initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
+ }
bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
AU.addPreserved<ProfileInfo>();
}
@@ -76,10 +107,9 @@ namespace {
bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
void EliminateMostlyEmptyBlock(BasicBlock *BB);
bool OptimizeBlock(BasicBlock &BB);
- bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy,
- DenseMap<Value*,Value*> &SunkAddrs);
- bool OptimizeInlineAsmInst(Instruction *I, CallSite CS,
- DenseMap<Value*,Value*> &SunkAddrs);
+ bool OptimizeInst(Instruction *I);
+ bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy);
+ bool OptimizeInlineAsmInst(CallInst *CS);
bool OptimizeCallInst(CallInst *CI);
bool MoveExtToFormExtLoad(Instruction *I);
bool OptimizeExtUses(Instruction *I);
@@ -89,7 +119,7 @@ namespace {
char CodeGenPrepare::ID = 0;
INITIALIZE_PASS(CodeGenPrepare, "codegenprepare",
- "Optimize for code generation", false, false);
+ "Optimize for code generation", false, false)
FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
return new CodeGenPrepare(TLI);
@@ -108,13 +138,16 @@ void CodeGenPrepare::findLoopBackEdges(const Function &F) {
bool CodeGenPrepare::runOnFunction(Function &F) {
bool EverMadeChange = false;
+ DT = getAnalysisIfAvailable<DominatorTree>();
PFI = getAnalysisIfAvailable<ProfileInfo>();
// First pass, eliminate blocks that contain only PHI nodes and an
// unconditional branch.
EverMadeChange |= EliminateMostlyEmptyBlocks(F);
- // Now find loop back edges.
- findLoopBackEdges(F);
+ // Now find loop back edges, but only if they are being used to decide which
+ // critical edges to split.
+ if (CriticalEdgeSplit)
+ findLoopBackEdges(F);
bool MadeChange = true;
while (MadeChange) {
@@ -123,6 +156,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
MadeChange |= OptimizeBlock(*BB);
EverMadeChange |= MadeChange;
}
+
+ SunkAddrs.clear();
+
return EverMadeChange;
}
@@ -297,11 +333,19 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
// The PHIs are now updated, change everything that refers to BB to use
// DestBB and remove BB.
BB->replaceAllUsesWith(DestBB);
+ if (DT) {
+ BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock();
+ BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
+ BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
+ DT->changeImmediateDominator(DestBB, NewIDom);
+ DT->eraseNode(BB);
+ }
if (PFI) {
PFI->replaceAllUses(BB, DestBB);
PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
}
BB->eraseFromParent();
+ ++NumBlocksElim;
DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
}
@@ -480,6 +524,7 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
// Replace a use of the cast with a use of the new cast.
TheUse = InsertedCast;
+ ++NumCastUses;
}
// If we removed all uses, nuke the cast.
@@ -537,6 +582,7 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
// Replace a use of the cmp with a use of the new cmp.
TheUse = InsertedCmp;
+ ++NumCmpUses;
}
// If we removed all uses, nuke the cmp.
@@ -563,14 +609,45 @@ protected:
} // end anonymous namespace
bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
+ BasicBlock *BB = CI->getParent();
+
+ // Lower inline assembly if we can.
+ // If we found an inline asm expession, and if the target knows how to
+ // lower it to normal LLVM code, do so now.
+ if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
+ if (TLI->ExpandInlineAsm(CI)) {
+ // Avoid invalidating the iterator.
+ CurInstIterator = BB->begin();
+ // Avoid processing instructions out of order, which could cause
+ // reuse before a value is defined.
+ SunkAddrs.clear();
+ return true;
+ }
+ // Sink address computing for memory operands into the block.
+ if (OptimizeInlineAsmInst(CI))
+ return true;
+ }
+
// Lower all uses of llvm.objectsize.*
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
const Type *ReturnTy = CI->getType();
Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
- CI->replaceAllUsesWith(RetVal);
- CI->eraseFromParent();
+
+ // Substituting this can cause recursive simplifications, which can
+ // invalidate our iterator. Use a WeakVH to hold onto it in case this
+ // happens.
+ WeakVH IterHandle(CurInstIterator);
+
+ ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, DT);
+
+ // If the iterator instruction was recursively deleted, start over at the
+ // start of the block.
+ if (IterHandle != CurInstIterator) {
+ CurInstIterator = BB->begin();
+ SunkAddrs.clear();
+ }
return true;
}
@@ -588,6 +665,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
CodeGenPrepareFortifiedLibCalls Simplifier;
return Simplifier.fold(CI, TD);
}
+
//===----------------------------------------------------------------------===//
// Memory Optimization
//===----------------------------------------------------------------------===//
@@ -610,13 +688,69 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
/// This method is used to optimize both load/store and inline asms with memory
/// operands.
bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
- const Type *AccessTy,
- DenseMap<Value*,Value*> &SunkAddrs) {
- // Figure out what addressing mode will be built up for this operation.
+ const Type *AccessTy) {
+ Value *Repl = Addr;
+
+ // Try to collapse single-value PHI nodes. This is necessary to undo
+ // unprofitable PRE transformations.
+ SmallVector<Value*, 8> worklist;
+ SmallPtrSet<Value*, 16> Visited;
+ worklist.push_back(Addr);
+
+ // Use a worklist to iteratively look through PHI nodes, and ensure that
+ // the addressing mode obtained from the non-PHI roots of the graph
+ // are equivalent.
+ Value *Consensus = 0;
+ unsigned NumUses = 0;
SmallVector<Instruction*, 16> AddrModeInsts;
- ExtAddrMode AddrMode = AddressingModeMatcher::Match(Addr, AccessTy,MemoryInst,
- AddrModeInsts, *TLI);
-
+ ExtAddrMode AddrMode;
+ while (!worklist.empty()) {
+ Value *V = worklist.back();
+ worklist.pop_back();
+
+ // Break use-def graph loops.
+ if (Visited.count(V)) {
+ Consensus = 0;
+ break;
+ }
+
+ Visited.insert(V);
+
+ // For a PHI node, push all of its incoming values.
+ if (PHINode *P = dyn_cast<PHINode>(V)) {
+ for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
+ worklist.push_back(P->getIncomingValue(i));
+ continue;
+ }
+
+ // For non-PHIs, determine the addressing mode being computed.
+ SmallVector<Instruction*, 16> NewAddrModeInsts;
+ ExtAddrMode NewAddrMode =
+ AddressingModeMatcher::Match(V, AccessTy,MemoryInst,
+ NewAddrModeInsts, *TLI);
+
+ // Ensure that the obtained addressing mode is equivalent to that obtained
+ // for all other roots of the PHI traversal. Also, when choosing one
+ // such root as representative, select the one with the most uses in order
+ // to keep the cost modeling heuristics in AddressingModeMatcher applicable.
+ if (!Consensus || NewAddrMode == AddrMode) {
+ if (V->getNumUses() > NumUses) {
+ Consensus = V;
+ NumUses = V->getNumUses();
+ AddrMode = NewAddrMode;
+ AddrModeInsts = NewAddrModeInsts;
+ }
+ continue;
+ }
+
+ Consensus = 0;
+ break;
+ }
+
+ // If the addressing mode couldn't be determined, or if multiple different
+ // ones were determined, bail out now.
+ if (!Consensus) return false;
+
// Check to see if any of the instructions supersumed by this addr mode are
// non-local to I's BB.
bool AnyNonLocal = false;
@@ -719,60 +853,39 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
}
- MemoryInst->replaceUsesOfWith(Addr, SunkAddr);
+ MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
- if (Addr->use_empty()) {
- RecursivelyDeleteTriviallyDeadInstructions(Addr);
+ if (Repl->use_empty()) {
+ RecursivelyDeleteTriviallyDeadInstructions(Repl);
// This address is now available for reassignment, so erase the table entry;
// we don't want to match some completely different instruction.
SunkAddrs[Addr] = 0;
}
+ ++NumMemoryInsts;
return true;
}
/// OptimizeInlineAsmInst - If there are any memory operands, use
/// OptimizeMemoryInst to sink their address computing into the block when
/// possible / profitable.
-bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS,
- DenseMap<Value*,Value*> &SunkAddrs) {
+bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
bool MadeChange = false;
- InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
-
- // Do a prepass over the constraints, canonicalizing them, and building up the
- // ConstraintOperands list.
- std::vector<InlineAsm::ConstraintInfo>
- ConstraintInfos = IA->ParseConstraints();
-
- /// ConstraintOperands - Information about all of the constraints.
- std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands;
- unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
- for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
- ConstraintOperands.
- push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i]));
- TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back();
-
- // Compute the value type for each operand.
- switch (OpInfo.Type) {
- case InlineAsm::isOutput:
- if (OpInfo.isIndirect)
- OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
- break;
- case InlineAsm::isInput:
- OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
- break;
- case InlineAsm::isClobber:
- // Nothing to do.
- break;
- }
+ TargetLowering::AsmOperandInfoVector
+ TargetConstraints = TLI->ParseConstraints(CS);
+ unsigned ArgNo = 0;
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+
// Compute the constraint code and ConstraintType to use.
TLI->ComputeConstraintToUse(OpInfo, SDValue());
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.isIndirect) {
- Value *OpVal = OpInfo.CallOperandVal;
- MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs);
- }
+ Value *OpVal = CS->getArgOperand(ArgNo++);
+ MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
+ } else if (OpInfo.Type == InlineAsm::isInput)
+ ArgNo++;
}
return MadeChange;
@@ -794,7 +907,9 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
// If the load has other users and the truncate is not free, this probably
// isn't worthwhile.
if (!LI->hasOneUse() &&
- TLI && !TLI->isTruncateFree(I->getType(), LI->getType()))
+ TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) ||
+ !TLI->isTypeLegal(TLI->getValueType(I->getType()))) &&
+ !TLI->isTruncateFree(I->getType(), LI->getType()))
return false;
// Check whether the target supports casts folded into loads.
@@ -812,13 +927,14 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
// can fold it.
I->removeFromParent();
I->insertAfter(LI);
+ ++NumExtsMoved;
return true;
}
bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
BasicBlock *DefBB = I->getParent();
- // If both result of the {s|z}xt and its source are live out, rewrite all
+ // If the result of a {s|z}ext and its source are both live out, rewrite all
// other uses of the source with result of extension.
Value *Src = I->getOperand(0);
if (Src->hasOneUse())
@@ -883,13 +999,83 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
// Replace a use of the {s|z}ext source with a use of the result.
TheUse = InsertedTrunc;
-
+ ++NumExtUses;
MadeChange = true;
}
return MadeChange;
}
+bool CodeGenPrepare::OptimizeInst(Instruction *I) {
+ if (PHINode *P = dyn_cast<PHINode>(I)) {
+ // It is possible for very late stage optimizations (such as SimplifyCFG)
+ // to introduce PHI nodes too late to be cleaned up. If we detect such a
+ // trivial PHI, go ahead and zap it here.
+ if (Value *V = SimplifyInstruction(P)) {
+ P->replaceAllUsesWith(V);
+ P->eraseFromParent();
+ ++NumPHIsElim;
+ return true;
+ }
+ return false;
+ }
+
+ if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ // If the source of the cast is a constant, then this should have
+ // already been constant folded. The only reason NOT to constant fold
+ // it is if something (e.g. LSR) was careful to place the constant
+ // evaluation in a block other than then one that uses it (e.g. to hoist
+ // the address of globals out of a loop). If this is the case, we don't
+ // want to forward-subst the cast.
+ if (isa<Constant>(CI->getOperand(0)))
+ return false;
+
+ if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
+ return true;
+
+ if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
+ bool MadeChange = MoveExtToFormExtLoad(I);
+ return MadeChange | OptimizeExtUses(I);
+ }
+ return false;
+ }
+
+ if (CmpInst *CI = dyn_cast<CmpInst>(I))
+ return OptimizeCmpExpression(CI);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (TLI)
+ return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
+ return false;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (TLI)
+ return OptimizeMemoryInst(I, SI->getOperand(1),
+ SI->getOperand(0)->getType());
+ return false;
+ }
+
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+ if (GEPI->hasAllZeroIndices()) {
+ /// The GEP operand must be a pointer, so must its result -> BitCast
+ Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
+ GEPI->getName(), GEPI);
+ GEPI->replaceAllUsesWith(NC);
+ GEPI->eraseFromParent();
+ ++NumGEPsElim;
+ OptimizeInst(NC);
+ return true;
+ }
+ return false;
+ }
+
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ return OptimizeCallInst(CI);
+
+ return false;
+}
+
// In this pass we look for GEP and cast instructions that are used
// across basic blocks and rewrite them to improve basic-block-at-a-time
// selection.
@@ -908,74 +1094,11 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
}
}
- // Keep track of non-local addresses that have been sunk into this block.
- // This allows us to avoid inserting duplicate code for blocks with multiple
- // load/stores of the same address.
- DenseMap<Value*, Value*> SunkAddrs;
-
- for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) {
- Instruction *I = BBI++;
+ SunkAddrs.clear();
- if (CastInst *CI = dyn_cast<CastInst>(I)) {
- // If the source of the cast is a constant, then this should have
- // already been constant folded. The only reason NOT to constant fold
- // it is if something (e.g. LSR) was careful to place the constant
- // evaluation in a block other than then one that uses it (e.g. to hoist
- // the address of globals out of a loop). If this is the case, we don't
- // want to forward-subst the cast.
- if (isa<Constant>(CI->getOperand(0)))
- continue;
-
- bool Change = false;
- if (TLI) {
- Change = OptimizeNoopCopyExpression(CI, *TLI);
- MadeChange |= Change;
- }
-
- if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) {
- MadeChange |= MoveExtToFormExtLoad(I);
- MadeChange |= OptimizeExtUses(I);
- }
- } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
- MadeChange |= OptimizeCmpExpression(CI);
- } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (TLI)
- MadeChange |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType(),
- SunkAddrs);
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (TLI)
- MadeChange |= OptimizeMemoryInst(I, SI->getOperand(1),
- SI->getOperand(0)->getType(),
- SunkAddrs);
- } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
- if (GEPI->hasAllZeroIndices()) {
- /// The GEP operand must be a pointer, so must its result -> BitCast
- Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
- GEPI->getName(), GEPI);
- GEPI->replaceAllUsesWith(NC);
- GEPI->eraseFromParent();
- MadeChange = true;
- BBI = NC;
- }
- } else if (CallInst *CI = dyn_cast<CallInst>(I)) {
- // If we found an inline asm expession, and if the target knows how to
- // lower it to normal LLVM code, do so now.
- if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
- if (TLI->ExpandInlineAsm(CI)) {
- BBI = BB.begin();
- // Avoid processing instructions out of order, which could cause
- // reuse before a value is defined.
- SunkAddrs.clear();
- } else
- // Sink address computing for memory operands into the block.
- MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs);
- } else {
- // Other CallInst optimizations that don't need to muck with the
- // enclosing iterator here.
- MadeChange |= OptimizeCallInst(CI);
- }
- }
- }
+ CurInstIterator = BB.begin();
+ for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; )
+ MadeChange |= OptimizeInst(CurInstIterator++);
return MadeChange;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
index a0ea369..664c3f6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
@@ -34,7 +34,9 @@ STATISTIC(NumInstKilled, "Number of instructions killed");
namespace {
struct ConstantPropagation : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- ConstantPropagation() : FunctionPass(ID) {}
+ ConstantPropagation() : FunctionPass(ID) {
+ initializeConstantPropagationPass(*PassRegistry::getPassRegistry());
+ }
bool runOnFunction(Function &F);
@@ -46,7 +48,7 @@ namespace {
char ConstantPropagation::ID = 0;
INITIALIZE_PASS(ConstantPropagation, "constprop",
- "Simple constant propagation", false, false);
+ "Simple constant propagation", false, false)
FunctionPass *llvm::createConstantPropagationPass() {
return new ConstantPropagation();
diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 0d4e45d..be12973 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -16,6 +16,7 @@
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/Pass.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Support/CFG.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -30,18 +31,20 @@ STATISTIC(NumCmps, "Number of comparisons propagated");
namespace {
class CorrelatedValuePropagation : public FunctionPass {
LazyValueInfo *LVI;
-
+
bool processSelect(SelectInst *SI);
bool processPHI(PHINode *P);
bool processMemAccess(Instruction *I);
bool processCmp(CmpInst *C);
-
+
public:
static char ID;
- CorrelatedValuePropagation(): FunctionPass(ID) { }
-
+ CorrelatedValuePropagation(): FunctionPass(ID) {
+ initializeCorrelatedValuePropagationPass(*PassRegistry::getPassRegistry());
+ }
+
bool runOnFunction(Function &F);
-
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<LazyValueInfo>();
}
@@ -49,8 +52,11 @@ namespace {
}
char CorrelatedValuePropagation::ID = 0;
-INITIALIZE_PASS(CorrelatedValuePropagation, "correlated-propagation",
- "Value Propagation", false, false);
+INITIALIZE_PASS_BEGIN(CorrelatedValuePropagation, "correlated-propagation",
+ "Value Propagation", false, false)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_END(CorrelatedValuePropagation, "correlated-propagation",
+ "Value Propagation", false, false)
// Public interface to the Value Propagation pass
Pass *llvm::createCorrelatedValuePropagationPass() {
@@ -60,46 +66,51 @@ Pass *llvm::createCorrelatedValuePropagationPass() {
bool CorrelatedValuePropagation::processSelect(SelectInst *S) {
if (S->getType()->isVectorTy()) return false;
if (isa<Constant>(S->getOperand(0))) return false;
-
+
Constant *C = LVI->getConstant(S->getOperand(0), S->getParent());
if (!C) return false;
-
+
ConstantInt *CI = dyn_cast<ConstantInt>(C);
if (!CI) return false;
-
- S->replaceAllUsesWith(S->getOperand(CI->isOne() ? 1 : 2));
+
+ Value *ReplaceWith = S->getOperand(1);
+ Value *Other = S->getOperand(2);
+ if (!CI->isOne()) std::swap(ReplaceWith, Other);
+ if (ReplaceWith == S) ReplaceWith = UndefValue::get(S->getType());
+
+ S->replaceAllUsesWith(ReplaceWith);
S->eraseFromParent();
++NumSelects;
-
+
return true;
}
bool CorrelatedValuePropagation::processPHI(PHINode *P) {
bool Changed = false;
-
+
BasicBlock *BB = P->getParent();
for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
Value *Incoming = P->getIncomingValue(i);
if (isa<Constant>(Incoming)) continue;
-
+
Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i),
P->getIncomingBlock(i),
BB);
if (!C) continue;
-
+
P->setIncomingValue(i, C);
Changed = true;
}
-
- if (Value *ConstVal = P->hasConstantValue()) {
- P->replaceAllUsesWith(ConstVal);
+
+ if (Value *V = SimplifyInstruction(P)) {
+ P->replaceAllUsesWith(V);
P->eraseFromParent();
Changed = true;
}
-
+
++NumPhis;
-
+
return Changed;
}
@@ -109,12 +120,12 @@ bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
Pointer = L->getPointerOperand();
else
Pointer = cast<StoreInst>(I)->getPointerOperand();
-
+
if (isa<Constant>(Pointer)) return false;
-
+
Constant *C = LVI->getConstant(Pointer, I->getParent());
if (!C) return false;
-
+
++NumMemAccess;
I->replaceUsesOfWith(Pointer, C);
return true;
@@ -130,32 +141,32 @@ bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
if (isa<Instruction>(Op0) &&
cast<Instruction>(Op0)->getParent() == C->getParent())
return false;
-
+
Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
if (!Op1) return false;
-
+
pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent());
if (PI == PE) return false;
-
- LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(),
+
+ LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(),
C->getOperand(0), Op1, *PI, C->getParent());
if (Result == LazyValueInfo::Unknown) return false;
++PI;
while (PI != PE) {
- LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(),
+ LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(),
C->getOperand(0), Op1, *PI, C->getParent());
if (Res != Result) return false;
++PI;
}
-
+
++NumCmps;
-
+
if (Result == LazyValueInfo::True)
C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext()));
else
C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext()));
-
+
C->eraseFromParent();
return true;
@@ -163,9 +174,9 @@ bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
bool CorrelatedValuePropagation::runOnFunction(Function &F) {
LVI = &getAnalysis<LazyValueInfo>();
-
+
bool FnChanged = false;
-
+
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
bool BBChanged = false;
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
@@ -187,14 +198,9 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
break;
}
}
-
- // Propagating correlated values might leave cruft around.
- // Try to clean it up before we continue.
- if (BBChanged)
- SimplifyInstructionsInBlock(FI);
-
+
FnChanged |= BBChanged;
}
-
+
return FnChanged;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
index 87ea803..dbb68f3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -35,7 +35,9 @@ namespace {
//
struct DeadInstElimination : public BasicBlockPass {
static char ID; // Pass identification, replacement for typeid
- DeadInstElimination() : BasicBlockPass(ID) {}
+ DeadInstElimination() : BasicBlockPass(ID) {
+ initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnBasicBlock(BasicBlock &BB) {
bool Changed = false;
for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
@@ -57,7 +59,7 @@ namespace {
char DeadInstElimination::ID = 0;
INITIALIZE_PASS(DeadInstElimination, "die",
- "Dead Instruction Elimination", false, false);
+ "Dead Instruction Elimination", false, false)
Pass *llvm::createDeadInstEliminationPass() {
return new DeadInstElimination();
@@ -70,7 +72,9 @@ namespace {
//
struct DCE : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- DCE() : FunctionPass(ID) {}
+ DCE() : FunctionPass(ID) {
+ initializeDCEPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
@@ -81,7 +85,7 @@ namespace {
}
char DCE::ID = 0;
-INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false);
+INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
bool DCE::runOnFunction(Function &F) {
// Start out with all of the instructions in the worklist...
diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index c8fd9d9..867a06a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -19,17 +19,20 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(NumFastStores, "Number of stores deleted");
@@ -37,58 +40,107 @@ STATISTIC(NumFastOther , "Number of other instrs removed");
namespace {
struct DSE : public FunctionPass {
- TargetData *TD;
+ AliasAnalysis *AA;
+ MemoryDependenceAnalysis *MD;
static char ID; // Pass identification, replacement for typeid
- DSE() : FunctionPass(ID) {}
+ DSE() : FunctionPass(ID), AA(0), MD(0) {
+ initializeDSEPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F) {
- bool Changed = false;
-
+ AA = &getAnalysis<AliasAnalysis>();
+ MD = &getAnalysis<MemoryDependenceAnalysis>();
DominatorTree &DT = getAnalysis<DominatorTree>();
+ bool Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
// Only check non-dead blocks. Dead blocks may have strange pointer
// cycles that will confuse alias analysis.
if (DT.isReachableFromEntry(I))
Changed |= runOnBasicBlock(*I);
+
+ AA = 0; MD = 0;
return Changed;
}
bool runOnBasicBlock(BasicBlock &BB);
- bool handleFreeWithNonTrivialDependency(const CallInst *F,
- MemDepResult Dep);
+ bool HandleFree(CallInst *F);
bool handleEndBlock(BasicBlock &BB);
- bool RemoveUndeadPointers(Value *Ptr, uint64_t killPointerSize,
- BasicBlock::iterator &BBI,
- SmallPtrSet<Value*, 64> &deadPointers);
- void DeleteDeadInstruction(Instruction *I,
- SmallPtrSet<Value*, 64> *deadPointers = 0);
-
+ void RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
+ SmallPtrSet<Value*, 16> &DeadStackObjects);
- // getAnalysisUsage - We require post dominance frontiers (aka Control
- // Dependence Graph)
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<DominatorTree>();
AU.addRequired<AliasAnalysis>();
AU.addRequired<MemoryDependenceAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
AU.addPreserved<DominatorTree>();
AU.addPreserved<MemoryDependenceAnalysis>();
}
-
- unsigned getPointerSize(Value *V) const;
};
}
char DSE::ID = 0;
-INITIALIZE_PASS(DSE, "dse", "Dead Store Elimination", false, false);
+INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false)
FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
-/// doesClobberMemory - Does this instruction clobber (write without reading)
-/// some memory?
-static bool doesClobberMemory(Instruction *I) {
+//===----------------------------------------------------------------------===//
+// Helper functions
+//===----------------------------------------------------------------------===//
+
+/// DeleteDeadInstruction - Delete this instruction. Before we do, go through
+/// and zero out all the operands of this instruction. If any of them become
+/// dead, delete them and the computation tree that feeds them.
+///
+/// If ValueSet is non-null, remove any deleted instructions from it as well.
+///
+static void DeleteDeadInstruction(Instruction *I,
+ MemoryDependenceAnalysis &MD,
+ SmallPtrSet<Value*, 16> *ValueSet = 0) {
+ SmallVector<Instruction*, 32> NowDeadInsts;
+
+ NowDeadInsts.push_back(I);
+ --NumFastOther;
+
+ // Before we touch this instruction, remove it from memdep!
+ do {
+ Instruction *DeadInst = NowDeadInsts.pop_back_val();
+ ++NumFastOther;
+
+ // This instruction is dead, zap it, in stages. Start by removing it from
+ // MemDep, which needs to know the operands and needs it to be in the
+ // function.
+ MD.removeInstruction(DeadInst);
+
+ for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
+ Value *Op = DeadInst->getOperand(op);
+ DeadInst->setOperand(op, 0);
+
+ // If this operand just became dead, add it to the NowDeadInsts list.
+ if (!Op->use_empty()) continue;
+
+ if (Instruction *OpI = dyn_cast<Instruction>(Op))
+ if (isInstructionTriviallyDead(OpI))
+ NowDeadInsts.push_back(OpI);
+ }
+
+ DeadInst->eraseFromParent();
+
+ if (ValueSet) ValueSet->erase(DeadInst);
+ } while (!NowDeadInsts.empty());
+}
+
+
+/// hasMemoryWrite - Does this instruction write some memory? This only returns
+/// true for things that we can analyze with other helpers below.
+static bool hasMemoryWrite(Instruction *I) {
if (isa<StoreInst>(I))
return true;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
@@ -106,146 +158,296 @@ static bool doesClobberMemory(Instruction *I) {
return false;
}
-/// isElidable - If the value of this instruction and the memory it writes to is
-/// unused, may we delete this instrtction?
-static bool isElidable(Instruction *I) {
- assert(doesClobberMemory(I));
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
- return II->getIntrinsicID() != Intrinsic::lifetime_end;
+/// getLocForWrite - Return a Location stored to by the specified instruction.
+static AliasAnalysis::Location
+getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ return AA.getLocation(SI);
+
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) {
+ // memcpy/memmove/memset.
+ AliasAnalysis::Location Loc = AA.getLocationForDest(MI);
+ // If we don't have target data around, an unknown size in Location means
+ // that we should use the size of the pointee type. This isn't valid for
+ // memset/memcpy, which writes more than an i8.
+ if (Loc.Size == AliasAnalysis::UnknownSize && AA.getTargetData() == 0)
+ return AliasAnalysis::Location();
+ return Loc;
+ }
+
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
+ if (II == 0) return AliasAnalysis::Location();
+
+ switch (II->getIntrinsicID()) {
+ default: return AliasAnalysis::Location(); // Unhandled intrinsic.
+ case Intrinsic::init_trampoline:
+ // If we don't have target data around, an unknown size in Location means
+ // that we should use the size of the pointee type. This isn't valid for
+ // init.trampoline, which writes more than an i8.
+ if (AA.getTargetData() == 0) return AliasAnalysis::Location();
+
+ // FIXME: We don't know the size of the trampoline, so we can't really
+ // handle it here.
+ return AliasAnalysis::Location(II->getArgOperand(0));
+ case Intrinsic::lifetime_end: {
+ uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
+ return AliasAnalysis::Location(II->getArgOperand(1), Len);
+ }
+ }
+}
+
+/// getLocForRead - Return the location read by the specified "hasMemoryWrite"
+/// instruction if any.
+static AliasAnalysis::Location
+getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
+ assert(hasMemoryWrite(Inst) && "Unknown instruction case");
+
+ // The only instructions that both read and write are the mem transfer
+ // instructions (memcpy/memmove).
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst))
+ return AA.getLocationForSource(MTI);
+ return AliasAnalysis::Location();
+}
+
+
+/// isRemovable - If the value of this instruction and the memory it writes to
+/// is unused, may we delete this instruction?
+static bool isRemovable(Instruction *I) {
+ // Don't remove volatile stores.
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return !SI->isVolatile();
- return true;
+
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
+ default: assert(0 && "doesn't pass 'hasMemoryWrite' predicate");
+ case Intrinsic::lifetime_end:
+ // Never remove dead lifetime_end's, e.g. because it is followed by a
+ // free.
+ return false;
+ case Intrinsic::init_trampoline:
+ // Always safe to remove init_trampoline.
+ return true;
+
+ case Intrinsic::memset:
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ // Don't remove volatile memory intrinsics.
+ return !cast<MemIntrinsic>(II)->isVolatile();
+ }
}
-/// getPointerOperand - Return the pointer that is being clobbered.
-static Value *getPointerOperand(Instruction *I) {
- assert(doesClobberMemory(I));
+/// getStoredPointerOperand - Return the pointer that is being written to.
+static Value *getStoredPointerOperand(Instruction *I) {
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->getPointerOperand();
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
- return MI->getArgOperand(0);
+ return MI->getDest();
IntrinsicInst *II = cast<IntrinsicInst>(I);
switch (II->getIntrinsicID()) {
default: assert(false && "Unexpected intrinsic!");
case Intrinsic::init_trampoline:
return II->getArgOperand(0);
- case Intrinsic::lifetime_end:
- return II->getArgOperand(1);
}
}
-/// getStoreSize - Return the length in bytes of the write by the clobbering
-/// instruction. If variable or unknown, returns -1.
-static unsigned getStoreSize(Instruction *I, const TargetData *TD) {
- assert(doesClobberMemory(I));
- if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (!TD) return -1u;
- return TD->getTypeStoreSize(SI->getOperand(0)->getType());
+static uint64_t getPointerSize(Value *V, AliasAnalysis &AA) {
+ const TargetData *TD = AA.getTargetData();
+ if (TD == 0)
+ return AliasAnalysis::UnknownSize;
+
+ if (AllocaInst *A = dyn_cast<AllocaInst>(V)) {
+ // Get size information for the alloca
+ if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize()))
+ return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType());
+ return AliasAnalysis::UnknownSize;
}
+
+ assert(isa<Argument>(V) && "Expected AllocaInst or Argument!");
+ const PointerType *PT = cast<PointerType>(V->getType());
+ return TD->getTypeAllocSize(PT->getElementType());
+}
- Value *Len;
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
- Len = MI->getLength();
- } else {
- IntrinsicInst *II = cast<IntrinsicInst>(I);
- switch (II->getIntrinsicID()) {
- default: assert(false && "Unexpected intrinsic!");
- case Intrinsic::init_trampoline:
- return -1u;
- case Intrinsic::lifetime_end:
- Len = II->getArgOperand(0);
- break;
+/// isObjectPointerWithTrustworthySize - Return true if the specified Value* is
+/// pointing to an object with a pointer size we can trust.
+static bool isObjectPointerWithTrustworthySize(const Value *V) {
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
+ return !AI->isArrayAllocation();
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return !GV->mayBeOverridden();
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return A->hasByValAttr();
+ return false;
+}
+
+/// isCompleteOverwrite - Return true if a store to the 'Later' location
+/// completely overwrites a store to the 'Earlier' location.
+static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
+ const AliasAnalysis::Location &Earlier,
+ AliasAnalysis &AA) {
+ const Value *P1 = Earlier.Ptr->stripPointerCasts();
+ const Value *P2 = Later.Ptr->stripPointerCasts();
+
+ // If the start pointers are the same, we just have to compare sizes to see if
+ // the later store was larger than the earlier store.
+ if (P1 == P2) {
+ // If we don't know the sizes of either access, then we can't do a
+ // comparison.
+ if (Later.Size == AliasAnalysis::UnknownSize ||
+ Earlier.Size == AliasAnalysis::UnknownSize) {
+ // If we have no TargetData information around, then the size of the store
+ // is inferrable from the pointee type. If they are the same type, then
+ // we know that the store is safe.
+ if (AA.getTargetData() == 0)
+ return Later.Ptr->getType() == Earlier.Ptr->getType();
+ return false;
}
+
+ // Make sure that the Later size is >= the Earlier size.
+ if (Later.Size < Earlier.Size)
+ return false;
+ return true;
}
- if (ConstantInt *LenCI = dyn_cast<ConstantInt>(Len))
- if (!LenCI->isAllOnesValue())
- return LenCI->getZExtValue();
- return -1u;
+
+ // Otherwise, we have to have size information, and the later store has to be
+ // larger than the earlier one.
+ if (Later.Size == AliasAnalysis::UnknownSize ||
+ Earlier.Size == AliasAnalysis::UnknownSize ||
+ Later.Size <= Earlier.Size || AA.getTargetData() == 0)
+ return false;
+
+ // Check to see if the later store is to the entire object (either a global,
+ // an alloca, or a byval argument). If so, then it clearly overwrites any
+ // other store to the same object.
+ const TargetData &TD = *AA.getTargetData();
+
+ const Value *UO1 = GetUnderlyingObject(P1, &TD),
+ *UO2 = GetUnderlyingObject(P2, &TD);
+
+ // If we can't resolve the same pointers to the same object, then we can't
+ // analyze them at all.
+ if (UO1 != UO2)
+ return false;
+
+ // If the "Later" store is to a recognizable object, get its size.
+ if (isObjectPointerWithTrustworthySize(UO2)) {
+ uint64_t ObjectSize =
+ TD.getTypeAllocSize(cast<PointerType>(UO2->getType())->getElementType());
+ if (ObjectSize == Later.Size)
+ return true;
+ }
+
+ // Okay, we have stores to two completely different pointers. Try to
+ // decompose the pointer into a "base + constant_offset" form. If the base
+ // pointers are equal, then we can reason about the two stores.
+ int64_t Off1 = 0, Off2 = 0;
+ const Value *BP1 = GetPointerBaseWithConstantOffset(P1, Off1, TD);
+ const Value *BP2 = GetPointerBaseWithConstantOffset(P2, Off2, TD);
+
+ // If the base pointers still differ, we have two completely different stores.
+ if (BP1 != BP2)
+ return false;
+
+ // Otherwise, we might have a situation like:
+ // store i16 -> P + 1 Byte
+ // store i32 -> P
+ // In this case, we see if the later store completely overlaps all bytes
+ // stored by the previous store.
+ if (Off1 < Off2 || // Earlier starts before Later.
+ Off1+Earlier.Size > Off2+Later.Size) // Earlier goes beyond Later.
+ return false;
+ // Otherwise, we have complete overlap.
+ return true;
}
-/// isStoreAtLeastAsWideAs - Return true if the size of the store in I1 is
-/// greater than or equal to the store in I2. This returns false if we don't
-/// know.
+/// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
+/// memory region into an identical pointer) then it doesn't actually make its
+/// input dead in the traditional sense. Consider this case:
+///
+/// memcpy(A <- B)
+/// memcpy(A <- A)
+///
+/// In this case, the second store to A does not make the first store to A dead.
+/// The usual situation isn't an explicit A<-A store like this (which can be
+/// trivially removed) but a case where two pointers may alias.
///
-static bool isStoreAtLeastAsWideAs(Instruction *I1, Instruction *I2,
- const TargetData *TD) {
- const Type *I1Ty = getPointerOperand(I1)->getType();
- const Type *I2Ty = getPointerOperand(I2)->getType();
+/// This function detects when it is unsafe to remove a dependent instruction
+/// because the DSE inducing instruction may be a self-read.
+static bool isPossibleSelfRead(Instruction *Inst,
+ const AliasAnalysis::Location &InstStoreLoc,
+ Instruction *DepWrite, AliasAnalysis &AA) {
+ // Self reads can only happen for instructions that read memory. Get the
+ // location read.
+ AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA);
+ if (InstReadLoc.Ptr == 0) return false; // Not a reading instruction.
- // Exactly the same type, must have exactly the same size.
- if (I1Ty == I2Ty) return true;
+ // If the read and written loc obviously don't alias, it isn't a read.
+ if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false;
- int I1Size = getStoreSize(I1, TD);
- int I2Size = getStoreSize(I2, TD);
+ // Okay, 'Inst' may copy over itself. However, we can still remove a the
+ // DepWrite instruction if we can prove that it reads from the same location
+ // as Inst. This handles useful cases like:
+ // memcpy(A <- B)
+ // memcpy(A <- B)
+ // Here we don't know if A/B may alias, but we do know that B/B are must
+ // aliases, so removing the first memcpy is safe (assuming it writes <= #
+ // bytes as the second one.
+ AliasAnalysis::Location DepReadLoc = getLocForRead(DepWrite, AA);
- return I1Size != -1 && I2Size != -1 && I1Size >= I2Size;
+ if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
+ return false;
+
+ // If DepWrite doesn't read memory or if we can't prove it is a must alias,
+ // then it can't be considered dead.
+ return true;
}
-bool DSE::runOnBasicBlock(BasicBlock &BB) {
- MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
- TD = getAnalysisIfAvailable<TargetData>();
+//===----------------------------------------------------------------------===//
+// DSE Pass
+//===----------------------------------------------------------------------===//
+
+bool DSE::runOnBasicBlock(BasicBlock &BB) {
bool MadeChange = false;
// Do a top-down walk on the BB.
for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
Instruction *Inst = BBI++;
- // If we find a store or a free, get its memory dependence.
- if (!doesClobberMemory(Inst) && !isFreeCall(Inst))
- continue;
-
- MemDepResult InstDep = MD.getDependency(Inst);
-
- // Ignore non-local stores.
- // FIXME: cross-block DSE would be fun. :)
- if (InstDep.isNonLocal()) continue;
-
- // Handle frees whose dependencies are non-trivial.
- if (const CallInst *F = isFreeCall(Inst)) {
- MadeChange |= handleFreeWithNonTrivialDependency(F, InstDep);
+ // Handle 'free' calls specially.
+ if (CallInst *F = isFreeCall(Inst)) {
+ MadeChange |= HandleFree(F);
continue;
}
- // If not a definite must-alias dependency, ignore it.
- if (!InstDep.isDef())
+ // If we find something that writes memory, get its memory dependence.
+ if (!hasMemoryWrite(Inst))
continue;
-
- // If this is a store-store dependence, then the previous store is dead so
- // long as this store is at least as big as it.
- if (doesClobberMemory(InstDep.getInst())) {
- Instruction *DepStore = InstDep.getInst();
- if (isStoreAtLeastAsWideAs(Inst, DepStore, TD) &&
- isElidable(DepStore)) {
- // Delete the store and now-dead instructions that feed it.
- DeleteDeadInstruction(DepStore);
- ++NumFastStores;
- MadeChange = true;
- // DeleteDeadInstruction can delete the current instruction in loop
- // cases, reset BBI.
- BBI = Inst;
- if (BBI != BB.begin())
- --BBI;
- continue;
- }
- }
+ MemDepResult InstDep = MD->getDependency(Inst);
- if (!isElidable(Inst))
+ // Ignore non-local store liveness.
+ // FIXME: cross-block DSE would be fun. :)
+ if (InstDep.isNonLocal() ||
+ // Ignore self dependence, which happens in the entry block of the
+ // function.
+ InstDep.getInst() == Inst)
continue;
-
+
// If we're storing the same value back to a pointer that we just
// loaded from, then the store can be removed.
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
- SI->getOperand(0) == DepLoad) {
+ SI->getOperand(0) == DepLoad && !SI->isVolatile()) {
+ DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n "
+ << "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n');
+
// DeleteDeadInstruction can delete the current instruction. Save BBI
// in case we need it.
WeakVH NextInst(BBI);
- DeleteDeadInstruction(SI);
+ DeleteDeadInstruction(SI, *MD);
if (NextInst == 0) // Next instruction deleted.
BBI = BB.begin();
@@ -258,24 +460,63 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
}
}
- // If this is a lifetime end marker, we can throw away the store.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(InstDep.getInst())) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_end) {
- // Delete the store and now-dead instructions that feed it.
- // DeleteDeadInstruction can delete the current instruction. Save BBI
- // in case we need it.
- WeakVH NextInst(BBI);
-
- DeleteDeadInstruction(Inst);
+ // Figure out what location is being stored to.
+ AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA);
+
+ // If we didn't get a useful location, fail.
+ if (Loc.Ptr == 0)
+ continue;
+
+ while (!InstDep.isNonLocal()) {
+ // Get the memory clobbered by the instruction we depend on. MemDep will
+ // skip any instructions that 'Loc' clearly doesn't interact with. If we
+ // end up depending on a may- or must-aliased load, then we can't optimize
+ // away the store and we bail out. However, if we depend on on something
+ // that overwrites the memory location we *can* potentially optimize it.
+ //
+ // Find out what memory location the dependant instruction stores.
+ Instruction *DepWrite = InstDep.getInst();
+ AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA);
+ // If we didn't get a useful location, or if it isn't a size, bail out.
+ if (DepLoc.Ptr == 0)
+ break;
+
+ // If we find a write that is a) removable (i.e., non-volatile), b) is
+ // completely obliterated by the store to 'Loc', and c) which we know that
+ // 'Inst' doesn't load from, then we can remove it.
+ if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, *AA) &&
+ !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
+ DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
+ << *DepWrite << "\n KILLER: " << *Inst << '\n');
- if (NextInst == 0) // Next instruction deleted.
- BBI = BB.begin();
- else if (BBI != BB.begin()) // Revisit this instruction if possible.
- --BBI;
+ // Delete the store and now-dead instructions that feed it.
+ DeleteDeadInstruction(DepWrite, *MD);
++NumFastStores;
MadeChange = true;
- continue;
+
+ // DeleteDeadInstruction can delete the current instruction in loop
+ // cases, reset BBI.
+ BBI = Inst;
+ if (BBI != BB.begin())
+ --BBI;
+ break;
}
+
+ // If this is a may-aliased store that is clobbering the store value, we
+ // can keep searching past it for another must-aliased pointer that stores
+ // to the same location. For example, in:
+ // store -> P
+ // store -> Q
+ // store -> P
+ // we can remove the first store to P even though we don't know if P and Q
+ // alias.
+ if (DepWrite == &BB.front()) break;
+
+ // Can't look past this instruction if it might read 'Loc'.
+ if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref)
+ break;
+
+ InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB);
}
}
@@ -287,26 +528,36 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
return MadeChange;
}
-/// handleFreeWithNonTrivialDependency - Handle frees of entire structures whose
-/// dependency is a store to a field of that structure.
-bool DSE::handleFreeWithNonTrivialDependency(const CallInst *F,
- MemDepResult Dep) {
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-
- Instruction *Dependency = Dep.getInst();
- if (!Dependency || !doesClobberMemory(Dependency) || !isElidable(Dependency))
- return false;
+/// HandleFree - Handle frees of entire structures whose dependency is a store
+/// to a field of that structure.
+bool DSE::HandleFree(CallInst *F) {
+ MemDepResult Dep = MD->getDependency(F);
+ do {
+ if (Dep.isNonLocal()) return false;
+
+ Instruction *Dependency = Dep.getInst();
+ if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
+ return false;
- Value *DepPointer = getPointerOperand(Dependency)->getUnderlyingObject();
+ Value *DepPointer =
+ GetUnderlyingObject(getStoredPointerOperand(Dependency));
- // Check for aliasing.
- if (AA.alias(F->getArgOperand(0), 1, DepPointer, 1) !=
- AliasAnalysis::MustAlias)
- return false;
+ // Check for aliasing.
+ if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
+ return false;
+
+ // DCE instructions only used to calculate that store
+ DeleteDeadInstruction(Dependency, *MD);
+ ++NumFastStores;
+
+ // Inst's old Dependency is now deleted. Compute the next dependency,
+ // which may also be dead, as in
+ // s[0] = 0;
+ // s[1] = 0; // This has just been deleted.
+ // free(s);
+ Dep = MD->getDependency(F);
+ } while (!Dep.isNonLocal());
- // DCE instructions only used to calculate that store
- DeleteDeadInstruction(Dependency);
- ++NumFastStores;
return true;
}
@@ -317,259 +568,163 @@ bool DSE::handleFreeWithNonTrivialDependency(const CallInst *F,
/// store i32 1, i32* %A
/// ret void
bool DSE::handleEndBlock(BasicBlock &BB) {
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-
bool MadeChange = false;
- // Pointers alloca'd in this function are dead in the end block
- SmallPtrSet<Value*, 64> deadPointers;
+ // Keep track of all of the stack objects that are dead at the end of the
+ // function.
+ SmallPtrSet<Value*, 16> DeadStackObjects;
// Find all of the alloca'd pointers in the entry block.
BasicBlock *Entry = BB.getParent()->begin();
for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I)
if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
- deadPointers.insert(AI);
+ DeadStackObjects.insert(AI);
// Treat byval arguments the same, stores to them are dead at the end of the
// function.
for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
AE = BB.getParent()->arg_end(); AI != AE; ++AI)
if (AI->hasByValAttr())
- deadPointers.insert(AI);
+ DeadStackObjects.insert(AI);
// Scan the basic block backwards
for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
--BBI;
- // If we find a store whose pointer is dead.
- if (doesClobberMemory(BBI)) {
- if (isElidable(BBI)) {
- // See through pointer-to-pointer bitcasts
- Value *pointerOperand = getPointerOperand(BBI)->getUnderlyingObject();
-
- // Alloca'd pointers or byval arguments (which are functionally like
- // alloca's) are valid candidates for removal.
- if (deadPointers.count(pointerOperand)) {
- // DCE instructions only used to calculate that store.
- Instruction *Dead = BBI;
- ++BBI;
- DeleteDeadInstruction(Dead, &deadPointers);
- ++NumFastStores;
- MadeChange = true;
- continue;
- }
- }
-
- // Because a memcpy or memmove is also a load, we can't skip it if we
- // didn't remove it.
- if (!isa<MemTransferInst>(BBI))
+ // If we find a store, check to see if it points into a dead stack value.
+ if (hasMemoryWrite(BBI) && isRemovable(BBI)) {
+ // See through pointer-to-pointer bitcasts
+ Value *Pointer = GetUnderlyingObject(getStoredPointerOperand(BBI));
+
+ // Stores to stack values are valid candidates for removal.
+ if (DeadStackObjects.count(Pointer)) {
+ Instruction *Dead = BBI++;
+
+ DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: "
+ << *Dead << "\n Object: " << *Pointer << '\n');
+
+ // DCE instructions only used to calculate that store.
+ DeleteDeadInstruction(Dead, *MD, &DeadStackObjects);
+ ++NumFastStores;
+ MadeChange = true;
continue;
+ }
}
- Value *killPointer = 0;
- uint64_t killPointerSize = ~0UL;
+ // Remove any dead non-memory-mutating instructions.
+ if (isInstructionTriviallyDead(BBI)) {
+ Instruction *Inst = BBI++;
+ DeleteDeadInstruction(Inst, *MD, &DeadStackObjects);
+ ++NumFastOther;
+ MadeChange = true;
+ continue;
+ }
- // If we encounter a use of the pointer, it is no longer considered dead
- if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
- // However, if this load is unused and not volatile, we can go ahead and
- // remove it, and not have to worry about it making our pointer undead!
- if (L->use_empty() && !L->isVolatile()) {
- ++BBI;
- DeleteDeadInstruction(L, &deadPointers);
- ++NumFastOther;
- MadeChange = true;
- continue;
- }
-
- killPointer = L->getPointerOperand();
- } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
- killPointer = V->getOperand(0);
- } else if (isa<MemTransferInst>(BBI) &&
- isa<ConstantInt>(cast<MemTransferInst>(BBI)->getLength())) {
- killPointer = cast<MemTransferInst>(BBI)->getSource();
- killPointerSize = cast<ConstantInt>(
- cast<MemTransferInst>(BBI)->getLength())->getZExtValue();
- } else if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) {
- deadPointers.erase(A);
-
- // Dead alloca's can be DCE'd when we reach them
- if (A->use_empty()) {
- ++BBI;
- DeleteDeadInstruction(A, &deadPointers);
- ++NumFastOther;
- MadeChange = true;
- }
-
+ if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) {
+ DeadStackObjects.erase(A);
continue;
- } else if (CallSite CS = cast<Value>(BBI)) {
- // If this call does not access memory, it can't
- // be undeadifying any of our pointers.
- if (AA.doesNotAccessMemory(CS))
+ }
+
+ if (CallSite CS = cast<Value>(BBI)) {
+ // If this call does not access memory, it can't be loading any of our
+ // pointers.
+ if (AA->doesNotAccessMemory(CS))
continue;
- unsigned modRef = 0;
- unsigned other = 0;
+ unsigned NumModRef = 0, NumOther = 0;
- // Remove any pointers made undead by the call from the dead set
- std::vector<Value*> dead;
- for (SmallPtrSet<Value*, 64>::iterator I = deadPointers.begin(),
- E = deadPointers.end(); I != E; ++I) {
- // HACK: if we detect that our AA is imprecise, it's not
- // worth it to scan the rest of the deadPointers set. Just
- // assume that the AA will return ModRef for everything, and
- // go ahead and bail.
- if (modRef >= 16 && other == 0) {
- deadPointers.clear();
+ // If the call might load from any of our allocas, then any store above
+ // the call is live.
+ SmallVector<Value*, 8> LiveAllocas;
+ for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
+ E = DeadStackObjects.end(); I != E; ++I) {
+ // If we detect that our AA is imprecise, it's not worth it to scan the
+ // rest of the DeadPointers set. Just assume that the AA will return
+ // ModRef for everything, and go ahead and bail out.
+ if (NumModRef >= 16 && NumOther == 0)
return MadeChange;
- }
-
- // See if the call site touches it
- AliasAnalysis::ModRefResult A = AA.getModRefInfo(CS, *I,
- getPointerSize(*I));
+
+ // See if the call site touches it.
+ AliasAnalysis::ModRefResult A =
+ AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA));
if (A == AliasAnalysis::ModRef)
- ++modRef;
+ ++NumModRef;
else
- ++other;
+ ++NumOther;
if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
- dead.push_back(*I);
+ LiveAllocas.push_back(*I);
}
-
- for (std::vector<Value*>::iterator I = dead.begin(), E = dead.end();
- I != E; ++I)
- deadPointers.erase(*I);
- continue;
- } else if (isInstructionTriviallyDead(BBI)) {
- // For any non-memory-affecting non-terminators, DCE them as we reach them
- Instruction *Inst = BBI;
- ++BBI;
- DeleteDeadInstruction(Inst, &deadPointers);
- ++NumFastOther;
- MadeChange = true;
+ for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
+ E = LiveAllocas.end(); I != E; ++I)
+ DeadStackObjects.erase(*I);
+
+ // If all of the allocas were clobbered by the call then we're not going
+ // to find anything else to process.
+ if (DeadStackObjects.empty())
+ return MadeChange;
+
continue;
}
- if (!killPointer)
+ AliasAnalysis::Location LoadedLoc;
+
+ // If we encounter a use of the pointer, it is no longer considered dead
+ if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
+ LoadedLoc = AA->getLocation(L);
+ } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
+ LoadedLoc = AA->getLocation(V);
+ } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) {
+ LoadedLoc = AA->getLocationForSource(MTI);
+ } else {
+ // Not a loading instruction.
continue;
+ }
- killPointer = killPointer->getUnderlyingObject();
+ // Remove any allocas from the DeadPointer set that are loaded, as this
+ // makes any stores above the access live.
+ RemoveAccessedObjects(LoadedLoc, DeadStackObjects);
- // Deal with undead pointers
- MadeChange |= RemoveUndeadPointers(killPointer, killPointerSize, BBI,
- deadPointers);
+ // If all of the allocas were clobbered by the access then we're not going
+ // to find anything else to process.
+ if (DeadStackObjects.empty())
+ break;
}
return MadeChange;
}
-/// RemoveUndeadPointers - check for uses of a pointer that make it
-/// undead when scanning for dead stores to alloca's.
-bool DSE::RemoveUndeadPointers(Value *killPointer, uint64_t killPointerSize,
- BasicBlock::iterator &BBI,
- SmallPtrSet<Value*, 64> &deadPointers) {
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-
- // If the kill pointer can be easily reduced to an alloca,
- // don't bother doing extraneous AA queries.
- if (deadPointers.count(killPointer)) {
- deadPointers.erase(killPointer);
- return false;
- }
-
- // A global can't be in the dead pointer set.
- if (isa<GlobalValue>(killPointer))
- return false;
-
- bool MadeChange = false;
+/// RemoveAccessedObjects - Check to see if the specified location may alias any
+/// of the stack objects in the DeadStackObjects set. If so, they become live
+/// because the location is being loaded.
+void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
+ SmallPtrSet<Value*, 16> &DeadStackObjects) {
+ const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr);
+
+ // A constant can't be in the dead pointer set.
+ if (isa<Constant>(UnderlyingPointer))
+ return;
- SmallVector<Value*, 16> undead;
+ // If the kill pointer can be easily reduced to an alloca, don't bother doing
+ // extraneous AA queries.
+ if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
+ DeadStackObjects.erase(const_cast<Value*>(UnderlyingPointer));
+ return;
+ }
- for (SmallPtrSet<Value*, 64>::iterator I = deadPointers.begin(),
- E = deadPointers.end(); I != E; ++I) {
- // See if this pointer could alias it
- AliasAnalysis::AliasResult A = AA.alias(*I, getPointerSize(*I),
- killPointer, killPointerSize);
-
- // If it must-alias and a store, we can delete it
- if (isa<StoreInst>(BBI) && A == AliasAnalysis::MustAlias) {
- StoreInst *S = cast<StoreInst>(BBI);
-
- // Remove it!
- ++BBI;
- DeleteDeadInstruction(S, &deadPointers);
- ++NumFastStores;
- MadeChange = true;
-
- continue;
-
- // Otherwise, it is undead
- } else if (A != AliasAnalysis::NoAlias)
- undead.push_back(*I);
+ SmallVector<Value*, 16> NowLive;
+ for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
+ E = DeadStackObjects.end(); I != E; ++I) {
+ // See if the loaded location could alias the stack location.
+ AliasAnalysis::Location StackLoc(*I, getPointerSize(*I, *AA));
+ if (!AA->isNoAlias(StackLoc, LoadedLoc))
+ NowLive.push_back(*I);
}
- for (SmallVector<Value*, 16>::iterator I = undead.begin(), E = undead.end();
+ for (SmallVector<Value*, 16>::iterator I = NowLive.begin(), E = NowLive.end();
I != E; ++I)
- deadPointers.erase(*I);
-
- return MadeChange;
+ DeadStackObjects.erase(*I);
}
-/// DeleteDeadInstruction - Delete this instruction. Before we do, go through
-/// and zero out all the operands of this instruction. If any of them become
-/// dead, delete them and the computation tree that feeds them.
-///
-/// If ValueSet is non-null, remove any deleted instructions from it as well.
-///
-void DSE::DeleteDeadInstruction(Instruction *I,
- SmallPtrSet<Value*, 64> *ValueSet) {
- SmallVector<Instruction*, 32> NowDeadInsts;
-
- NowDeadInsts.push_back(I);
- --NumFastOther;
-
- // Before we touch this instruction, remove it from memdep!
- MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>();
- do {
- Instruction *DeadInst = NowDeadInsts.pop_back_val();
-
- ++NumFastOther;
-
- // This instruction is dead, zap it, in stages. Start by removing it from
- // MemDep, which needs to know the operands and needs it to be in the
- // function.
- MDA.removeInstruction(DeadInst);
-
- for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
- Value *Op = DeadInst->getOperand(op);
- DeadInst->setOperand(op, 0);
-
- // If this operand just became dead, add it to the NowDeadInsts list.
- if (!Op->use_empty()) continue;
-
- if (Instruction *OpI = dyn_cast<Instruction>(Op))
- if (isInstructionTriviallyDead(OpI))
- NowDeadInsts.push_back(OpI);
- }
-
- DeadInst->eraseFromParent();
-
- if (ValueSet) ValueSet->erase(DeadInst);
- } while (!NowDeadInsts.empty());
-}
-
-unsigned DSE::getPointerSize(Value *V) const {
- if (TD) {
- if (AllocaInst *A = dyn_cast<AllocaInst>(V)) {
- // Get size information for the alloca
- if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize()))
- return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType());
- } else {
- assert(isa<Argument>(V) && "Expected AllocaInst or Argument!");
- const PointerType *PT = cast<PointerType>(V->getType());
- return TD->getTypeAllocSize(PT->getElementType());
- }
- }
- return ~0U;
-}
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
new file mode 100644
index 0000000..3d3f17b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -0,0 +1,470 @@
+//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs a simple dominator tree walk that eliminates trivially
+// redundant instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "early-cse"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
+STATISTIC(NumCSE, "Number of instructions CSE'd");
+STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
+STATISTIC(NumCSECall, "Number of call instructions CSE'd");
+STATISTIC(NumDSE, "Number of trivial dead stores removed");
+
+static unsigned getHash(const void *V) {
+ return DenseMapInfo<const void*>::getHashValue(V);
+}
+
+//===----------------------------------------------------------------------===//
+// SimpleValue
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// SimpleValue - Instances of this struct represent available values in the
+ /// scoped hash table.
+ struct SimpleValue {
+ Instruction *Inst;
+
+ SimpleValue(Instruction *I) : Inst(I) {
+ assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+ }
+
+ bool isSentinel() const {
+ return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
+ Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
+ }
+
+ static bool canHandle(Instruction *Inst) {
+ // This can only handle non-void readnone functions.
+ if (CallInst *CI = dyn_cast<CallInst>(Inst))
+ return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
+ return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
+ isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
+ isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+ isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
+ isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
+ }
+ };
+}
+
+namespace llvm {
+// SimpleValue is POD.
+template<> struct isPodLike<SimpleValue> {
+ static const bool value = true;
+};
+
+template<> struct DenseMapInfo<SimpleValue> {
+ static inline SimpleValue getEmptyKey() {
+ return DenseMapInfo<Instruction*>::getEmptyKey();
+ }
+ static inline SimpleValue getTombstoneKey() {
+ return DenseMapInfo<Instruction*>::getTombstoneKey();
+ }
+ static unsigned getHashValue(SimpleValue Val);
+ static bool isEqual(SimpleValue LHS, SimpleValue RHS);
+};
+}
+
+unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
+ Instruction *Inst = Val.Inst;
+
+ // Hash in all of the operands as pointers.
+ unsigned Res = 0;
+ for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
+ Res ^= getHash(Inst->getOperand(i)) << i;
+
+ if (CastInst *CI = dyn_cast<CastInst>(Inst))
+ Res ^= getHash(CI->getType());
+ else if (CmpInst *CI = dyn_cast<CmpInst>(Inst))
+ Res ^= CI->getPredicate();
+ else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst)) {
+ for (ExtractValueInst::idx_iterator I = EVI->idx_begin(),
+ E = EVI->idx_end(); I != E; ++I)
+ Res ^= *I;
+ } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst)) {
+ for (InsertValueInst::idx_iterator I = IVI->idx_begin(),
+ E = IVI->idx_end(); I != E; ++I)
+ Res ^= *I;
+ } else {
+ // nothing extra to hash in.
+ assert((isa<CallInst>(Inst) ||
+ isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
+ isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+ isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst)) &&
+ "Invalid/unknown instruction");
+ }
+
+ // Mix in the opcode.
+ return (Res << 1) ^ Inst->getOpcode();
+}
+
+bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
+ Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
+
+ if (LHS.isSentinel() || RHS.isSentinel())
+ return LHSI == RHSI;
+
+ if (LHSI->getOpcode() != RHSI->getOpcode()) return false;
+ return LHSI->isIdenticalTo(RHSI);
+}
+
+//===----------------------------------------------------------------------===//
+// CallValue
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// CallValue - Instances of this struct represent available call values in
+ /// the scoped hash table.
+ struct CallValue {
+ Instruction *Inst;
+
+ CallValue(Instruction *I) : Inst(I) {
+ assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+ }
+
+ bool isSentinel() const {
+ return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
+ Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
+ }
+
+ static bool canHandle(Instruction *Inst) {
+ // Don't value number anything that returns void.
+ if (Inst->getType()->isVoidTy())
+ return false;
+
+ CallInst *CI = dyn_cast<CallInst>(Inst);
+ if (CI == 0 || !CI->onlyReadsMemory())
+ return false;
+ return true;
+ }
+ };
+}
+
+namespace llvm {
+ // CallValue is POD.
+ template<> struct isPodLike<CallValue> {
+ static const bool value = true;
+ };
+
+ template<> struct DenseMapInfo<CallValue> {
+ static inline CallValue getEmptyKey() {
+ return DenseMapInfo<Instruction*>::getEmptyKey();
+ }
+ static inline CallValue getTombstoneKey() {
+ return DenseMapInfo<Instruction*>::getTombstoneKey();
+ }
+ static unsigned getHashValue(CallValue Val);
+ static bool isEqual(CallValue LHS, CallValue RHS);
+ };
+}
+unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
+ Instruction *Inst = Val.Inst;
+ // Hash in all of the operands as pointers.
+ unsigned Res = 0;
+ for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) {
+ assert(!Inst->getOperand(i)->getType()->isMetadataTy() &&
+ "Cannot value number calls with metadata operands");
+ Res ^= getHash(Inst->getOperand(i)) << i;
+ }
+
+ // Mix in the opcode.
+ return (Res << 1) ^ Inst->getOpcode();
+}
+
+bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
+ Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
+ if (LHS.isSentinel() || RHS.isSentinel())
+ return LHSI == RHSI;
+ return LHSI->isIdenticalTo(RHSI);
+}
+
+
+//===----------------------------------------------------------------------===//
+// EarlyCSE pass.
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// EarlyCSE - This pass does a simple depth-first walk over the dominator
+/// tree, eliminating trivially redundant instructions and using instsimplify
+/// to canonicalize things as it goes. It is intended to be fast and catch
+/// obvious cases so that instcombine and other passes are more effective. It
+/// is expected that a later pass of GVN will catch the interesting/hard
+/// cases.
+class EarlyCSE : public FunctionPass {
+public:
+ const TargetData *TD;
+ DominatorTree *DT;
+ typedef RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
+ typedef ScopedHashTable<SimpleValue, Value*, DenseMapInfo<SimpleValue>,
+ AllocatorTy> ScopedHTType;
+
+ /// AvailableValues - This scoped hash table contains the current values of
+ /// all of our simple scalar expressions. As we walk down the domtree, we
+ /// look to see if instructions are in this: if so, we replace them with what
+ /// we find, otherwise we insert them so that dominated values can succeed in
+ /// their lookup.
+ ScopedHTType *AvailableValues;
+
+ /// AvailableLoads - This scoped hash table contains the current values
+ /// of loads. This allows us to get efficient access to dominating loads when
+ /// we have a fully redundant load. In addition to the most recent load, we
+ /// keep track of a generation count of the read, which is compared against
+ /// the current generation count. The current generation count is
+ /// incremented after every possibly writing memory operation, which ensures
+ /// that we only CSE loads with other loads that have no intervening store.
+ typedef RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<Value*, std::pair<Value*, unsigned> > > LoadMapAllocator;
+ typedef ScopedHashTable<Value*, std::pair<Value*, unsigned>,
+ DenseMapInfo<Value*>, LoadMapAllocator> LoadHTType;
+ LoadHTType *AvailableLoads;
+
+ /// AvailableCalls - This scoped hash table contains the current values
+ /// of read-only call values. It uses the same generation count as loads.
+ typedef ScopedHashTable<CallValue, std::pair<Value*, unsigned> > CallHTType;
+ CallHTType *AvailableCalls;
+
+ /// CurrentGeneration - This is the current generation of the memory value.
+ unsigned CurrentGeneration;
+
+ static char ID;
+ explicit EarlyCSE() : FunctionPass(ID) {
+ initializeEarlyCSEPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F);
+
+private:
+
+ bool processNode(DomTreeNode *Node);
+
+ // This transformation requires dominator postdominator info
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.setPreservesCFG();
+ }
+};
+}
+
+char EarlyCSE::ID = 0;
+
+// createEarlyCSEPass - The public interface to this file.
+FunctionPass *llvm::createEarlyCSEPass() {
+ return new EarlyCSE();
+}
+
+INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
+
+bool EarlyCSE::processNode(DomTreeNode *Node) {
+ // Define a scope in the scoped hash table. When we are done processing this
+ // domtree node and recurse back up to our parent domtree node, this will pop
+ // off all the values we install.
+ ScopedHTType::ScopeTy Scope(*AvailableValues);
+
+ // Define a scope for the load values so that anything we add will get
+ // popped when we recurse back up to our parent domtree node.
+ LoadHTType::ScopeTy LoadScope(*AvailableLoads);
+
+ // Define a scope for the call values so that anything we add will get
+ // popped when we recurse back up to our parent domtree node.
+ CallHTType::ScopeTy CallScope(*AvailableCalls);
+
+ BasicBlock *BB = Node->getBlock();
+
+ // If this block has a single predecessor, then the predecessor is the parent
+ // of the domtree node and all of the live out memory values are still current
+ // in this block. If this block has multiple predecessors, then they could
+ // have invalidated the live-out memory values of our parent value. For now,
+ // just be conservative and invalidate memory if this block has multiple
+ // predecessors.
+ if (BB->getSinglePredecessor() == 0)
+ ++CurrentGeneration;
+
+ /// LastStore - Keep track of the last non-volatile store that we saw... for
+ /// as long as there in no instruction that reads memory. If we see a store
+ /// to the same location, we delete the dead store. This zaps trivial dead
+ /// stores which can occur in bitfield code among other things.
+ StoreInst *LastStore = 0;
+
+ bool Changed = false;
+
+ // See if any instructions in the block can be eliminated. If so, do it. If
+ // not, add them to AvailableValues.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ Instruction *Inst = I++;
+
+ // Dead instructions should just be removed.
+ if (isInstructionTriviallyDead(Inst)) {
+ DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumSimplify;
+ continue;
+ }
+
+ // If the instruction can be simplified (e.g. X+0 = X) then replace it with
+ // its simpler value.
+ if (Value *V = SimplifyInstruction(Inst, TD, DT)) {
+ DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n');
+ Inst->replaceAllUsesWith(V);
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumSimplify;
+ continue;
+ }
+
+ // If this is a simple instruction that we can value number, process it.
+ if (SimpleValue::canHandle(Inst)) {
+ // See if the instruction has an available value. If so, use it.
+ if (Value *V = AvailableValues->lookup(Inst)) {
+ DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << " to: " << *V << '\n');
+ Inst->replaceAllUsesWith(V);
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumCSE;
+ continue;
+ }
+
+ // Otherwise, just remember that this value is available.
+ AvailableValues->insert(Inst, Inst);
+ continue;
+ }
+
+ // If this is a non-volatile load, process it.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ // Ignore volatile loads.
+ if (LI->isVolatile()) {
+ LastStore = 0;
+ continue;
+ }
+
+ // If we have an available version of this load, and if it is the right
+ // generation, replace this instruction.
+ std::pair<Value*, unsigned> InVal =
+ AvailableLoads->lookup(Inst->getOperand(0));
+ if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+ DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << " to: "
+ << *InVal.first << '\n');
+ if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumCSELoad;
+ continue;
+ }
+
+ // Otherwise, remember that we have this instruction.
+ AvailableLoads->insert(Inst->getOperand(0),
+ std::pair<Value*, unsigned>(Inst, CurrentGeneration));
+ LastStore = 0;
+ continue;
+ }
+
+ // If this instruction may read from memory, forget LastStore.
+ if (Inst->mayReadFromMemory())
+ LastStore = 0;
+
+ // If this is a read-only call, process it.
+ if (CallValue::canHandle(Inst)) {
+ // If we have an available version of this call, and if it is the right
+ // generation, replace this instruction.
+ std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst);
+ if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+ DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << " to: "
+ << *InVal.first << '\n');
+ if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumCSECall;
+ continue;
+ }
+
+ // Otherwise, remember that we have this instruction.
+ AvailableCalls->insert(Inst,
+ std::pair<Value*, unsigned>(Inst, CurrentGeneration));
+ continue;
+ }
+
+ // Okay, this isn't something we can CSE at all. Check to see if it is
+ // something that could modify memory. If so, our available memory values
+ // cannot be used so bump the generation count.
+ if (Inst->mayWriteToMemory()) {
+ ++CurrentGeneration;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // We do a trivial form of DSE if there are two stores to the same
+ // location with no intervening loads. Delete the earlier store.
+ if (LastStore &&
+ LastStore->getPointerOperand() == SI->getPointerOperand()) {
+ DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << " due to: "
+ << *Inst << '\n');
+ LastStore->eraseFromParent();
+ Changed = true;
+ ++NumDSE;
+ LastStore = 0;
+ continue;
+ }
+
+ // Okay, we just invalidated anything we knew about loaded values. Try
+ // to salvage *something* by remembering that the stored value is a live
+ // version of the pointer. It is safe to forward from volatile stores
+ // to non-volatile loads, so we don't have to check for volatility of
+ // the store.
+ AvailableLoads->insert(SI->getPointerOperand(),
+ std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration));
+
+ // Remember that this was the last store we saw for DSE.
+ if (!SI->isVolatile())
+ LastStore = SI;
+ }
+ }
+ }
+
+ unsigned LiveOutGeneration = CurrentGeneration;
+ for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I) {
+ Changed |= processNode(*I);
+ // Pop any generation changes off the stack from the recursive walk.
+ CurrentGeneration = LiveOutGeneration;
+ }
+ return Changed;
+}
+
+
+bool EarlyCSE::runOnFunction(Function &F) {
+ TD = getAnalysisIfAvailable<TargetData>();
+ DT = &getAnalysis<DominatorTree>();
+
+ // Tables that the pass uses when walking the domtree.
+ ScopedHTType AVTable;
+ AvailableValues = &AVTable;
+ LoadHTType LoadTable;
+ AvailableLoads = &LoadTable;
+ CallHTType CallTable;
+ AvailableCalls = &CallTable;
+
+ CurrentGeneration = 0;
+ return processNode(DT->getRootNode());
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp b/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp
index 53dd06d..4c3d188 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp
@@ -27,13 +27,15 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
public:
static char ID; // Pass identification, replacement for typeid
- explicit GEPSplitter() : FunctionPass(ID) {}
+ explicit GEPSplitter() : FunctionPass(ID) {
+ initializeGEPSplitterPass(*PassRegistry::getPassRegistry());
+ }
};
}
char GEPSplitter::ID = 0;
INITIALIZE_PASS(GEPSplitter, "split-geps",
- "split complex GEPs into simple GEPs", false, false);
+ "split complex GEPs into simple GEPs", false, false)
FunctionPass *llvm::createGEPSplitterPass() {
return new GEPSplitter();
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index c62ce1f..a0123f5 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -17,39 +17,30 @@
#define DEBUG_TYPE "gvn"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
#include "llvm/GlobalVariable.h"
-#include "llvm/Function.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
-#include "llvm/Operator.h"
-#include "llvm/Value.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/PHITransAddr.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/IRBuilder.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
using namespace llvm;
STATISTIC(NumGVNInstr, "Number of instructions deleted");
@@ -61,7 +52,6 @@ STATISTIC(NumPRELoad, "Number of loads PRE'd");
static cl::opt<bool> EnablePRE("enable-pre",
cl::init(true), cl::Hidden);
static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
-static cl::opt<bool> EnableFullLoadPRE("enable-full-load-pre", cl::init(false));
//===----------------------------------------------------------------------===//
// ValueTable Class
@@ -72,76 +62,23 @@ static cl::opt<bool> EnableFullLoadPRE("enable-full-load-pre", cl::init(false));
/// two values.
namespace {
struct Expression {
- enum ExpressionOpcode {
- ADD = Instruction::Add,
- FADD = Instruction::FAdd,
- SUB = Instruction::Sub,
- FSUB = Instruction::FSub,
- MUL = Instruction::Mul,
- FMUL = Instruction::FMul,
- UDIV = Instruction::UDiv,
- SDIV = Instruction::SDiv,
- FDIV = Instruction::FDiv,
- UREM = Instruction::URem,
- SREM = Instruction::SRem,
- FREM = Instruction::FRem,
- SHL = Instruction::Shl,
- LSHR = Instruction::LShr,
- ASHR = Instruction::AShr,
- AND = Instruction::And,
- OR = Instruction::Or,
- XOR = Instruction::Xor,
- TRUNC = Instruction::Trunc,
- ZEXT = Instruction::ZExt,
- SEXT = Instruction::SExt,
- FPTOUI = Instruction::FPToUI,
- FPTOSI = Instruction::FPToSI,
- UITOFP = Instruction::UIToFP,
- SITOFP = Instruction::SIToFP,
- FPTRUNC = Instruction::FPTrunc,
- FPEXT = Instruction::FPExt,
- PTRTOINT = Instruction::PtrToInt,
- INTTOPTR = Instruction::IntToPtr,
- BITCAST = Instruction::BitCast,
- ICMPEQ, ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
- ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
- FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
- FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
- FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
- SHUFFLE, SELECT, GEP, CALL, CONSTANT,
- INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE };
-
- ExpressionOpcode opcode;
+ uint32_t opcode;
const Type* type;
SmallVector<uint32_t, 4> varargs;
- Value *function;
Expression() { }
- Expression(ExpressionOpcode o) : opcode(o) { }
+ Expression(uint32_t o) : opcode(o) { }
bool operator==(const Expression &other) const {
if (opcode != other.opcode)
return false;
- else if (opcode == EMPTY || opcode == TOMBSTONE)
+ else if (opcode == ~0U || opcode == ~1U)
return true;
else if (type != other.type)
return false;
- else if (function != other.function)
+ else if (varargs != other.varargs)
return false;
- else {
- if (varargs.size() != other.varargs.size())
- return false;
-
- for (size_t i = 0; i < varargs.size(); ++i)
- if (varargs[i] != other.varargs[i])
- return false;
-
- return true;
- }
- }
-
- bool operator!=(const Expression &other) const {
- return !(*this == other);
+ return true;
}
};
@@ -155,19 +92,7 @@ namespace {
uint32_t nextValueNumber;
- Expression::ExpressionOpcode getOpcode(CmpInst* C);
- Expression create_expression(BinaryOperator* BO);
- Expression create_expression(CmpInst* C);
- Expression create_expression(ShuffleVectorInst* V);
- Expression create_expression(ExtractElementInst* C);
- Expression create_expression(InsertElementInst* V);
- Expression create_expression(SelectInst* V);
- Expression create_expression(CastInst* C);
- Expression create_expression(GetElementPtrInst* G);
- Expression create_expression(CallInst* C);
- Expression create_expression(ExtractValueInst* C);
- Expression create_expression(InsertValueInst* C);
-
+ Expression create_expression(Instruction* I);
uint32_t lookup_or_add_call(CallInst* C);
public:
ValueTable() : nextValueNumber(1) { }
@@ -176,7 +101,6 @@ namespace {
void add(Value *V, uint32_t num);
void clear();
void erase(Value *v);
- unsigned size();
void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
AliasAnalysis *getAliasAnalysis() const { return AA; }
void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
@@ -189,11 +113,11 @@ namespace {
namespace llvm {
template <> struct DenseMapInfo<Expression> {
static inline Expression getEmptyKey() {
- return Expression(Expression::EMPTY);
+ return ~0U;
}
static inline Expression getTombstoneKey() {
- return Expression(Expression::TOMBSTONE);
+ return ~1U;
}
static unsigned getHashValue(const Expression e) {
@@ -205,20 +129,13 @@ template <> struct DenseMapInfo<Expression> {
for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
E = e.varargs.end(); I != E; ++I)
hash = *I + hash * 37;
-
- hash = ((unsigned)((uintptr_t)e.function >> 4) ^
- (unsigned)((uintptr_t)e.function >> 9)) +
- hash * 37;
-
+
return hash;
}
static bool isEqual(const Expression &LHS, const Expression &RHS) {
return LHS == RHS;
}
};
-
-template <>
-struct isPodLike<Expression> { static const bool value = true; };
}
@@ -226,185 +143,27 @@ struct isPodLike<Expression> { static const bool value = true; };
// ValueTable Internal Functions
//===----------------------------------------------------------------------===//
-Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
- if (isa<ICmpInst>(C)) {
- switch (C->getPredicate()) {
- default: // THIS SHOULD NEVER HAPPEN
- llvm_unreachable("Comparison with unknown predicate?");
- case ICmpInst::ICMP_EQ: return Expression::ICMPEQ;
- case ICmpInst::ICMP_NE: return Expression::ICMPNE;
- case ICmpInst::ICMP_UGT: return Expression::ICMPUGT;
- case ICmpInst::ICMP_UGE: return Expression::ICMPUGE;
- case ICmpInst::ICMP_ULT: return Expression::ICMPULT;
- case ICmpInst::ICMP_ULE: return Expression::ICMPULE;
- case ICmpInst::ICMP_SGT: return Expression::ICMPSGT;
- case ICmpInst::ICMP_SGE: return Expression::ICMPSGE;
- case ICmpInst::ICMP_SLT: return Expression::ICMPSLT;
- case ICmpInst::ICMP_SLE: return Expression::ICMPSLE;
- }
- } else {
- switch (C->getPredicate()) {
- default: // THIS SHOULD NEVER HAPPEN
- llvm_unreachable("Comparison with unknown predicate?");
- case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ;
- case FCmpInst::FCMP_OGT: return Expression::FCMPOGT;
- case FCmpInst::FCMP_OGE: return Expression::FCMPOGE;
- case FCmpInst::FCMP_OLT: return Expression::FCMPOLT;
- case FCmpInst::FCMP_OLE: return Expression::FCMPOLE;
- case FCmpInst::FCMP_ONE: return Expression::FCMPONE;
- case FCmpInst::FCMP_ORD: return Expression::FCMPORD;
- case FCmpInst::FCMP_UNO: return Expression::FCMPUNO;
- case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ;
- case FCmpInst::FCMP_UGT: return Expression::FCMPUGT;
- case FCmpInst::FCMP_UGE: return Expression::FCMPUGE;
- case FCmpInst::FCMP_ULT: return Expression::FCMPULT;
- case FCmpInst::FCMP_ULE: return Expression::FCMPULE;
- case FCmpInst::FCMP_UNE: return Expression::FCMPUNE;
- }
- }
-}
-
-Expression ValueTable::create_expression(CallInst* C) {
- Expression e;
-
- e.type = C->getType();
- e.function = C->getCalledFunction();
- e.opcode = Expression::CALL;
-
- CallSite CS(C);
- for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end();
- I != E; ++I)
- e.varargs.push_back(lookup_or_add(*I));
-
- return e;
-}
-
-Expression ValueTable::create_expression(BinaryOperator* BO) {
- Expression e;
- e.varargs.push_back(lookup_or_add(BO->getOperand(0)));
- e.varargs.push_back(lookup_or_add(BO->getOperand(1)));
- e.function = 0;
- e.type = BO->getType();
- e.opcode = static_cast<Expression::ExpressionOpcode>(BO->getOpcode());
-
- return e;
-}
-
-Expression ValueTable::create_expression(CmpInst* C) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(C->getOperand(0)));
- e.varargs.push_back(lookup_or_add(C->getOperand(1)));
- e.function = 0;
- e.type = C->getType();
- e.opcode = getOpcode(C);
-
- return e;
-}
-
-Expression ValueTable::create_expression(CastInst* C) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(C->getOperand(0)));
- e.function = 0;
- e.type = C->getType();
- e.opcode = static_cast<Expression::ExpressionOpcode>(C->getOpcode());
-
- return e;
-}
-
-Expression ValueTable::create_expression(ShuffleVectorInst* S) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(S->getOperand(0)));
- e.varargs.push_back(lookup_or_add(S->getOperand(1)));
- e.varargs.push_back(lookup_or_add(S->getOperand(2)));
- e.function = 0;
- e.type = S->getType();
- e.opcode = Expression::SHUFFLE;
-
- return e;
-}
-Expression ValueTable::create_expression(ExtractElementInst* E) {
+Expression ValueTable::create_expression(Instruction *I) {
Expression e;
-
- e.varargs.push_back(lookup_or_add(E->getOperand(0)));
- e.varargs.push_back(lookup_or_add(E->getOperand(1)));
- e.function = 0;
- e.type = E->getType();
- e.opcode = Expression::EXTRACT;
-
- return e;
-}
-
-Expression ValueTable::create_expression(InsertElementInst* I) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(I->getOperand(0)));
- e.varargs.push_back(lookup_or_add(I->getOperand(1)));
- e.varargs.push_back(lookup_or_add(I->getOperand(2)));
- e.function = 0;
e.type = I->getType();
- e.opcode = Expression::INSERT;
-
- return e;
-}
-
-Expression ValueTable::create_expression(SelectInst* I) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(I->getCondition()));
- e.varargs.push_back(lookup_or_add(I->getTrueValue()));
- e.varargs.push_back(lookup_or_add(I->getFalseValue()));
- e.function = 0;
- e.type = I->getType();
- e.opcode = Expression::SELECT;
-
- return e;
-}
-
-Expression ValueTable::create_expression(GetElementPtrInst* G) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(G->getPointerOperand()));
- e.function = 0;
- e.type = G->getType();
- e.opcode = Expression::GEP;
-
- for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
- I != E; ++I)
- e.varargs.push_back(lookup_or_add(*I));
-
- return e;
-}
-
-Expression ValueTable::create_expression(ExtractValueInst* E) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
- for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
- II != IE; ++II)
- e.varargs.push_back(*II);
- e.function = 0;
- e.type = E->getType();
- e.opcode = Expression::EXTRACTVALUE;
-
- return e;
-}
-
-Expression ValueTable::create_expression(InsertValueInst* E) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
- e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand()));
- for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
- II != IE; ++II)
- e.varargs.push_back(*II);
- e.function = 0;
- e.type = E->getType();
- e.opcode = Expression::INSERTVALUE;
-
+ e.opcode = I->getOpcode();
+ for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
+ OI != OE; ++OI)
+ e.varargs.push_back(lookup_or_add(*OI));
+
+ if (CmpInst *C = dyn_cast<CmpInst>(I))
+ e.opcode = (C->getOpcode() << 8) | C->getPredicate();
+ else if (ExtractValueInst *E = dyn_cast<ExtractValueInst>(I)) {
+ for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
+ II != IE; ++II)
+ e.varargs.push_back(*II);
+ } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
+ for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
+ II != IE; ++II)
+ e.varargs.push_back(*II);
+ }
+
return e;
}
@@ -563,12 +322,8 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
case Instruction::And:
case Instruction::Or :
case Instruction::Xor:
- exp = create_expression(cast<BinaryOperator>(I));
- break;
case Instruction::ICmp:
case Instruction::FCmp:
- exp = create_expression(cast<CmpInst>(I));
- break;
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
@@ -581,28 +336,14 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::BitCast:
- exp = create_expression(cast<CastInst>(I));
- break;
case Instruction::Select:
- exp = create_expression(cast<SelectInst>(I));
- break;
case Instruction::ExtractElement:
- exp = create_expression(cast<ExtractElementInst>(I));
- break;
case Instruction::InsertElement:
- exp = create_expression(cast<InsertElementInst>(I));
- break;
case Instruction::ShuffleVector:
- exp = create_expression(cast<ShuffleVectorInst>(I));
- break;
case Instruction::ExtractValue:
- exp = create_expression(cast<ExtractValueInst>(I));
- break;
case Instruction::InsertValue:
- exp = create_expression(cast<InsertValueInst>(I));
- break;
case Instruction::GetElementPtr:
- exp = create_expression(cast<GetElementPtrInst>(I));
+ exp = create_expression(I);
break;
default:
valueNumbering[V] = nextValueNumber;
@@ -649,30 +390,76 @@ void ValueTable::verifyRemoved(const Value *V) const {
//===----------------------------------------------------------------------===//
namespace {
- struct ValueNumberScope {
- ValueNumberScope* parent;
- DenseMap<uint32_t, Value*> table;
-
- ValueNumberScope(ValueNumberScope* p) : parent(p) { }
- };
-}
-
-namespace {
class GVN : public FunctionPass {
bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
explicit GVN(bool noloads = false)
- : FunctionPass(ID), NoLoads(noloads), MD(0) { }
+ : FunctionPass(ID), NoLoads(noloads), MD(0) {
+ initializeGVNPass(*PassRegistry::getPassRegistry());
+ }
private:
bool NoLoads;
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
+ const TargetData* TD;
ValueTable VN;
- DenseMap<BasicBlock*, ValueNumberScope*> localAvail;
+
+ /// LeaderTable - A mapping from value numbers to lists of Value*'s that
+ /// have that value number. Use findLeader to query it.
+ struct LeaderTableEntry {
+ Value *Val;
+ BasicBlock *BB;
+ LeaderTableEntry *Next;
+ };
+ DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
+ BumpPtrAllocator TableAllocator;
+
+ /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for
+ /// its value number.
+ void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
+ LeaderTableEntry& Curr = LeaderTable[N];
+ if (!Curr.Val) {
+ Curr.Val = V;
+ Curr.BB = BB;
+ return;
+ }
+
+ LeaderTableEntry* Node = TableAllocator.Allocate<LeaderTableEntry>();
+ Node->Val = V;
+ Node->BB = BB;
+ Node->Next = Curr.Next;
+ Curr.Next = Node;
+ }
+
+ /// removeFromLeaderTable - Scan the list of values corresponding to a given
+ /// value number, and remove the given value if encountered.
+ void removeFromLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
+ LeaderTableEntry* Prev = 0;
+ LeaderTableEntry* Curr = &LeaderTable[N];
+
+ while (Curr->Val != V || Curr->BB != BB) {
+ Prev = Curr;
+ Curr = Curr->Next;
+ }
+
+ if (Prev) {
+ Prev->Next = Curr->Next;
+ } else {
+ if (!Curr->Next) {
+ Curr->Val = 0;
+ Curr->BB = 0;
+ } else {
+ LeaderTableEntry* Next = Curr->Next;
+ Curr->Val = Next->Val;
+ Curr->BB = Next->BB;
+ Curr->Next = Next->Next;
+ }
+ }
+ }
// List of critical edges to be split between iterations.
SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
@@ -699,9 +486,8 @@ namespace {
bool processBlock(BasicBlock *BB);
void dump(DenseMap<uint32_t, Value*>& d);
bool iterateOnFunction(Function &F);
- Value *CollapsePhi(PHINode* p);
bool performPRE(Function& F);
- Value *lookupNumber(BasicBlock *BB, uint32_t num);
+ Value *findLeader(BasicBlock *BB, uint32_t num);
void cleanupGlobalSets();
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
@@ -715,7 +501,11 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) {
return new GVN(NoLoads);
}
-INITIALIZE_PASS(GVN, "gvn", "Global Value Numbering", false, false);
+INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
void GVN::dump(DenseMap<uint32_t, Value*>& d) {
errs() << "{\n";
@@ -727,33 +517,6 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) {
errs() << "}\n";
}
-static bool isSafeReplacement(PHINode* p, Instruction *inst) {
- if (!isa<PHINode>(inst))
- return true;
-
- for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end();
- UI != E; ++UI)
- if (PHINode* use_phi = dyn_cast<PHINode>(*UI))
- if (use_phi->getParent() == inst->getParent())
- return false;
-
- return true;
-}
-
-Value *GVN::CollapsePhi(PHINode *PN) {
- Value *ConstVal = PN->hasConstantValue(DT);
- if (!ConstVal) return 0;
-
- Instruction *Inst = dyn_cast<Instruction>(ConstVal);
- if (!Inst)
- return ConstVal;
-
- if (DT->dominates(Inst, PN))
- if (isSafeReplacement(PN, Inst))
- return Inst;
- return 0;
-}
-
/// IsValueFullyAvailableInBlock - Return true if we can prove that the value
/// we're analyzing is fully available in the specified block. As we go, keep
/// track of which blocks we know are fully alive in FullyAvailableBlocks. This
@@ -937,47 +700,6 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
}
-/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can
-/// be expressed as a base pointer plus a constant offset. Return the base and
-/// offset to the caller.
-static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
- const TargetData &TD) {
- Operator *PtrOp = dyn_cast<Operator>(Ptr);
- if (PtrOp == 0) return Ptr;
-
- // Just look through bitcasts.
- if (PtrOp->getOpcode() == Instruction::BitCast)
- return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD);
-
- // If this is a GEP with constant indices, we can look through it.
- GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp);
- if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr;
-
- gep_type_iterator GTI = gep_type_begin(GEP);
- for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E;
- ++I, ++GTI) {
- ConstantInt *OpC = cast<ConstantInt>(*I);
- if (OpC->isZero()) continue;
-
- // Handle a struct and array indices which add their offset to the pointer.
- if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
- Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
- } else {
- uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
- Offset += OpC->getSExtValue()*Size;
- }
- }
-
- // Re-sign extend from the pointer size if needed to get overflow edge cases
- // right.
- unsigned PtrSize = TD.getPointerSizeInBits();
- if (PtrSize < 64)
- Offset = (Offset << (64-PtrSize)) >> (64-PtrSize);
-
- return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD);
-}
-
-
/// AnalyzeLoadFromClobberingWrite - This function is called when we have a
/// memdep query of a load that ends up being a clobbering memory write (store,
/// memset, memcpy, memmove). This means that the write *may* provide bits used
@@ -996,9 +718,8 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
return -1;
int64_t StoreOffset = 0, LoadOffset = 0;
- Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD);
- Value *LoadBase =
- GetBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
+ Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD);
+ Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
if (StoreBase != LoadBase)
return -1;
@@ -1020,8 +741,6 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
// If the load and store don't overlap at all, the store doesn't provide
// anything to the load. In this case, they really don't alias at all, AA
// must have gotten confused.
- // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
- // remove this check, as it is duplicated with what we have below.
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
if ((WriteSizeInBits & 7) | (LoadSize & 7))
@@ -1067,12 +786,12 @@ static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
StoreInst *DepSI,
const TargetData &TD) {
// Cannot handle reading from store of first-class aggregate yet.
- if (DepSI->getOperand(0)->getType()->isStructTy() ||
- DepSI->getOperand(0)->getType()->isArrayTy())
+ if (DepSI->getValueOperand()->getType()->isStructTy() ||
+ DepSI->getValueOperand()->getType()->isArrayTy())
return -1;
Value *StorePtr = DepSI->getPointerOperand();
- uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType());
+ uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType());
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
StorePtr, StoreSize, TD);
}
@@ -1099,7 +818,7 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
Constant *Src = dyn_cast<Constant>(MTI->getSource());
if (Src == 0) return -1;
- GlobalVariable *GV = dyn_cast<GlobalVariable>(Src->getUnderlyingObject());
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &TD));
if (GV == 0 || !GV->isConstant()) return -1;
// See if the access is within the bounds of the transfer.
@@ -1331,6 +1050,15 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
if (V->getType()->isPointerTy())
for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
AA->copyValue(LI, NewPHIs[i]);
+
+ // Now that we've copied information to the new PHIs, scan through
+ // them again and inform alias analysis that we've added potentially
+ // escaping uses to any values that are operands to these PHIs.
+ for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) {
+ PHINode *P = NewPHIs[i];
+ for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii)
+ AA->addEscapingUse(P->getOperandUse(2*ii));
+ }
return V;
}
@@ -1347,8 +1075,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
SmallVectorImpl<Instruction*> &toErase) {
// Find the non-local dependencies of the load.
SmallVector<NonLocalDepResult, 64> Deps;
- MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(),
- Deps);
+ AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
+ MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
//DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
// << Deps.size() << *LI << '\n');
@@ -1376,8 +1104,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
SmallVector<AvailableValueInBlock, 16> ValuesPerBlock;
SmallVector<BasicBlock*, 16> UnavailableBlocks;
- const TargetData *TD = 0;
-
for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
@@ -1392,14 +1118,12 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// read by the load, we can extract the bits we need for the load from the
// stored value.
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
- if (TD == 0)
- TD = getAnalysisIfAvailable<TargetData>();
if (TD && Address) {
int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
DepSI, *TD);
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
- DepSI->getOperand(0),
+ DepSI->getValueOperand(),
Offset));
continue;
}
@@ -1409,8 +1133,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// If the clobbering value is a memset/memcpy/memmove, see if we can
// forward a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
- if (TD == 0)
- TD = getAnalysisIfAvailable<TargetData>();
if (TD && Address) {
int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
DepMI, *TD);
@@ -1440,13 +1162,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
// Reject loads and stores that are to the same address but are of
// different types if we have to.
- if (S->getOperand(0)->getType() != LI->getType()) {
- if (TD == 0)
- TD = getAnalysisIfAvailable<TargetData>();
-
+ if (S->getValueOperand()->getType() != LI->getType()) {
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
- if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getOperand(0),
+ if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
LI->getType(), *TD)) {
UnavailableBlocks.push_back(DepBB);
continue;
@@ -1454,16 +1173,13 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
}
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
- S->getOperand(0)));
+ S->getValueOperand()));
continue;
}
if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
// If the types mismatch and we can't handle it, reject reuse of the load.
if (LD->getType() != LI->getType()) {
- if (TD == 0)
- TD = getAnalysisIfAvailable<TargetData>();
-
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
@@ -1533,26 +1249,19 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
return false;
if (Blockers.count(TmpBB))
return false;
+
+ // If any of these blocks has more than one successor (i.e. if the edge we
+ // just traversed was critical), then there are other paths through this
+ // block along which the load may not be anticipated. Hoisting the load
+ // above this block would be adding the load to execution paths along
+ // which it was not previously executed.
if (TmpBB->getTerminator()->getNumSuccessors() != 1)
- allSingleSucc = false;
+ return false;
}
assert(TmpBB);
LoadBB = TmpBB;
- // If we have a repl set with LI itself in it, this means we have a loop where
- // at least one of the values is LI. Since this means that we won't be able
- // to eliminate LI even if we insert uses in the other predecessors, we will
- // end up increasing code size. Reject this by scanning for LI.
- for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
- if (ValuesPerBlock[i].isSimpleValue() &&
- ValuesPerBlock[i].getSimpleValue() == LI) {
- // Skip cases where LI is the only definition, even for EnableFullLoadPRE.
- if (!EnableFullLoadPRE || e == 1)
- return false;
- }
- }
-
// FIXME: It is extremely unclear what this loop is doing, other than
// artificially restricting loadpre.
if (isSinglePred) {
@@ -1612,14 +1321,13 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
unsigned NumUnavailablePreds = PredLoads.size();
assert(NumUnavailablePreds != 0 &&
"Fully available value should be eliminated above!");
- if (!EnableFullLoadPRE) {
- // If this load is unavailable in multiple predecessors, reject it.
- // FIXME: If we could restructure the CFG, we could make a common pred with
- // all the preds that don't have an available LI and insert a new load into
- // that one block.
- if (NumUnavailablePreds != 1)
+
+ // If this load is unavailable in multiple predecessors, reject it.
+ // FIXME: If we could restructure the CFG, we could make a common pred with
+ // all the preds that don't have an available LI and insert a new load into
+ // that one block.
+ if (NumUnavailablePreds != 1)
return false;
- }
// Check if the load can safely be moved to all the unavailable predecessors.
bool CanDoPRE = true;
@@ -1634,7 +1342,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// If all preds have a single successor, then we know it is safe to insert
// the load on the pred (?!?), so we can insert code to materialize the
// pointer if it is not available.
- PHITransAddr Address(LI->getOperand(0), TD);
+ PHITransAddr Address(LI->getPointerOperand(), TD);
Value *LoadPtr = 0;
if (allSingleSucc) {
LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
@@ -1648,7 +1356,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// we fail PRE.
if (LoadPtr == 0) {
DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
- << *LI->getOperand(0) << "\n");
+ << *LI->getPointerOperand() << "\n");
CanDoPRE = false;
break;
}
@@ -1657,8 +1365,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// @1 = getelementptr (i8* p, ...
// test p and branch if == 0
// load @1
- // It is valid to have the getelementptr before the test, even if p can be 0,
- // as getelementptr only does address arithmetic.
+ // It is valid to have the getelementptr before the test, even if p can
+ // be 0, as getelementptr only does address arithmetic.
// If we are not pushing the value through any multiple-successor blocks
// we do not have this case. Otherwise, check that the load is safe to
// put anywhere; this can be improved, but should be conservatively safe.
@@ -1675,8 +1383,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
}
if (!CanDoPRE) {
- while (!NewInsts.empty())
- NewInsts.pop_back_val()->eraseFromParent();
+ while (!NewInsts.empty()) {
+ Instruction *I = NewInsts.pop_back_val();
+ if (MD) MD->removeInstruction(I);
+ I->eraseFromParent();
+ }
return false;
}
@@ -1702,9 +1413,13 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
BasicBlock *UnavailablePred = I->first;
Value *LoadPtr = I->second;
- Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
- LI->getAlignment(),
- UnavailablePred->getTerminator());
+ Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
+ LI->getAlignment(),
+ UnavailablePred->getTerminator());
+
+ // Transfer the old load's TBAA tag to the new load.
+ if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa))
+ NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
// Add the newly created load.
ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
@@ -1753,19 +1468,19 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// access code.
Value *AvailVal = 0;
if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
- if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
+ if (TD) {
int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
L->getPointerOperand(),
DepSI, *TD);
if (Offset != -1)
- AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
+ AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
L->getType(), L, *TD);
}
// If the clobbering value is a memset/memcpy/memmove, see if we can forward
// a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
- if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
+ if (TD) {
int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
L->getPointerOperand(),
DepMI, *TD);
@@ -1804,14 +1519,13 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
Instruction *DepInst = Dep.getInst();
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
- Value *StoredVal = DepSI->getOperand(0);
+ Value *StoredVal = DepSI->getValueOperand();
// The store and load are to a must-aliased pointer, but they may not
// actually have the same type. See if we know how to reuse the stored
// value (depending on its type).
- const TargetData *TD = 0;
if (StoredVal->getType() != L->getType()) {
- if ((TD = getAnalysisIfAvailable<TargetData>())) {
+ if (TD) {
StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
L, *TD);
if (StoredVal == 0)
@@ -1840,9 +1554,8 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// The loads are of a must-aliased pointer, but they may not actually have
// the same type. See if we know how to reuse the previously loaded value
// (depending on its type).
- const TargetData *TD = 0;
if (DepLI->getType() != L->getType()) {
- if ((TD = getAnalysisIfAvailable<TargetData>())) {
+ if (TD) {
AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
if (AvailableVal == 0)
return false;
@@ -1890,20 +1603,32 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
return false;
}
-Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {
- DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB);
- if (I == localAvail.end())
- return 0;
-
- ValueNumberScope *Locals = I->second;
- while (Locals) {
- DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num);
- if (I != Locals->table.end())
- return I->second;
- Locals = Locals->parent;
+// findLeader - In order to find a leader for a given value number at a
+// specific basic block, we first obtain the list of all Values for that number,
+// and then scan the list to find one whose block dominates the block in
+// question. This is fast because dominator tree queries consist of only
+// a few comparisons of DFS numbers.
+Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
+ LeaderTableEntry Vals = LeaderTable[num];
+ if (!Vals.Val) return 0;
+
+ Value *Val = 0;
+ if (DT->dominates(Vals.BB, BB)) {
+ Val = Vals.Val;
+ if (isa<Constant>(Val)) return Val;
+ }
+
+ LeaderTableEntry* Next = Vals.Next;
+ while (Next) {
+ if (DT->dominates(Next->BB, BB)) {
+ if (isa<Constant>(Next->Val)) return Next->Val;
+ if (!Val) Val = Next->Val;
+ }
+
+ Next = Next->Next;
}
- return 0;
+ return Val;
}
@@ -1915,85 +1640,92 @@ bool GVN::processInstruction(Instruction *I,
if (isa<DbgInfoIntrinsic>(I))
return false;
+ // If the instruction can be easily simplified then do so now in preference
+ // to value numbering it. Value numbering often exposes redundancies, for
+ // example if it determines that %y is equal to %x then the instruction
+ // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
+ if (Value *V = SimplifyInstruction(I, TD, DT)) {
+ I->replaceAllUsesWith(V);
+ if (MD && V->getType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(V);
+ VN.erase(I);
+ toErase.push_back(I);
+ return true;
+ }
+
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
bool Changed = processLoad(LI, toErase);
if (!Changed) {
unsigned Num = VN.lookup_or_add(LI);
- localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI));
+ addToLeaderTable(Num, LI, LI->getParent());
}
return Changed;
}
- uint32_t NextNum = VN.getNextUnusedValueNumber();
- unsigned Num = VN.lookup_or_add(I);
-
+ // For conditions branches, we can perform simple conditional propagation on
+ // the condition value itself.
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
- localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
-
if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
return false;
-
+
Value *BranchCond = BI->getCondition();
uint32_t CondVN = VN.lookup_or_add(BranchCond);
-
+
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
-
+
if (TrueSucc->getSinglePredecessor())
- localAvail[TrueSucc]->table[CondVN] =
- ConstantInt::getTrue(TrueSucc->getContext());
+ addToLeaderTable(CondVN,
+ ConstantInt::getTrue(TrueSucc->getContext()),
+ TrueSucc);
if (FalseSucc->getSinglePredecessor())
- localAvail[FalseSucc]->table[CondVN] =
- ConstantInt::getFalse(TrueSucc->getContext());
-
+ addToLeaderTable(CondVN,
+ ConstantInt::getFalse(TrueSucc->getContext()),
+ FalseSucc);
+
return false;
+ }
+
+ // Instructions with void type don't return a value, so there's
+ // no point in trying to find redudancies in them.
+ if (I->getType()->isVoidTy()) return false;
+
+ uint32_t NextNum = VN.getNextUnusedValueNumber();
+ unsigned Num = VN.lookup_or_add(I);
// Allocations are always uniquely numbered, so we can save time and memory
// by fast failing them.
- } else if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) {
- localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
+ if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) {
+ addToLeaderTable(Num, I, I->getParent());
return false;
}
- // Collapse PHI nodes
- if (PHINode* p = dyn_cast<PHINode>(I)) {
- Value *constVal = CollapsePhi(p);
-
- if (constVal) {
- p->replaceAllUsesWith(constVal);
- if (MD && constVal->getType()->isPointerTy())
- MD->invalidateCachedPointerInfo(constVal);
- VN.erase(p);
-
- toErase.push_back(p);
- } else {
- localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
- }
-
// If the number we were assigned was a brand new VN, then we don't
// need to do a lookup to see if the number already exists
// somewhere in the domtree: it can't!
- } else if (Num == NextNum) {
- localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
-
+ if (Num == NextNum) {
+ addToLeaderTable(Num, I, I->getParent());
+ return false;
+ }
+
// Perform fast-path value-number based elimination of values inherited from
// dominators.
- } else if (Value *repl = lookupNumber(I->getParent(), Num)) {
- // Remove it!
- VN.erase(I);
- I->replaceAllUsesWith(repl);
- if (MD && repl->getType()->isPointerTy())
- MD->invalidateCachedPointerInfo(repl);
- toErase.push_back(I);
- return true;
-
- } else {
- localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
+ Value *repl = findLeader(I->getParent(), Num);
+ if (repl == 0) {
+ // Failure, just remember this instance for future use.
+ addToLeaderTable(Num, I, I->getParent());
+ return false;
}
-
- return false;
+
+ // Remove it!
+ VN.erase(I);
+ I->replaceAllUsesWith(repl);
+ if (MD && repl->getType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(repl);
+ toErase.push_back(I);
+ return true;
}
/// runOnFunction - This is the main transformation entry point for a function.
@@ -2001,6 +1733,7 @@ bool GVN::runOnFunction(Function& F) {
if (!NoLoads)
MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTree>();
+ TD = getAnalysisIfAvailable<TargetData>();
VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
VN.setMemDep(MD);
VN.setDomTree(DT);
@@ -2011,8 +1744,8 @@ bool GVN::runOnFunction(Function& F) {
// Merge unconditional branches, allowing PRE to catch more
// optimization opportunities.
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
- BasicBlock *BB = FI;
- ++FI;
+ BasicBlock *BB = FI++;
+
bool removedBlock = MergeBlockIntoPredecessor(BB, this);
if (removedBlock) ++NumGVNBlocks;
@@ -2020,7 +1753,6 @@ bool GVN::runOnFunction(Function& F) {
}
unsigned Iteration = 0;
-
while (ShouldContinue) {
DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
ShouldContinue = iterateOnFunction(F);
@@ -2138,20 +1870,19 @@ bool GVN::performPRE(Function &F) {
if (P == CurrentBlock) {
NumWithout = 2;
break;
- } else if (!localAvail.count(P)) {
+ } else if (!DT->dominates(&F.getEntryBlock(), P)) {
NumWithout = 2;
break;
}
- DenseMap<uint32_t, Value*>::iterator predV =
- localAvail[P]->table.find(ValNo);
- if (predV == localAvail[P]->table.end()) {
+ Value* predV = findLeader(P, ValNo);
+ if (predV == 0) {
PREPred = P;
++NumWithout;
- } else if (predV->second == CurInst) {
+ } else if (predV == CurInst) {
NumWithout = 2;
} else {
- predMap[P] = predV->second;
+ predMap[P] = predV;
++NumWith;
}
}
@@ -2186,7 +1917,7 @@ bool GVN::performPRE(Function &F) {
if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
continue;
- if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) {
+ if (Value *V = findLeader(PREPred, VN.lookup(Op))) {
PREInstr->setOperand(i, V);
} else {
success = false;
@@ -2210,7 +1941,7 @@ bool GVN::performPRE(Function &F) {
++NumGVNPRE;
// Update the availability map to include the new instruction.
- localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr));
+ addToLeaderTable(ValNo, PREInstr, PREPred);
// Create a PHI to make the value available in this block.
PHINode* Phi = PHINode::Create(CurInst->getType(),
@@ -2223,12 +1954,21 @@ bool GVN::performPRE(Function &F) {
}
VN.add(Phi, ValNo);
- localAvail[CurrentBlock]->table[ValNo] = Phi;
+ addToLeaderTable(ValNo, Phi, CurrentBlock);
CurInst->replaceAllUsesWith(Phi);
- if (MD && Phi->getType()->isPointerTy())
- MD->invalidateCachedPointerInfo(Phi);
+ if (Phi->getType()->isPointerTy()) {
+ // Because we have added a PHI-use of the pointer value, it has now
+ // "escaped" from alias analysis' perspective. We need to inform
+ // AA of this.
+ for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii)
+ VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(2*ii));
+
+ if (MD)
+ MD->invalidateCachedPointerInfo(Phi);
+ }
VN.erase(CurInst);
+ removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
if (MD) MD->removeInstruction(CurInst);
@@ -2260,16 +2000,7 @@ bool GVN::splitCriticalEdges() {
/// iterateOnFunction - Executes one iteration of GVN
bool GVN::iterateOnFunction(Function &F) {
cleanupGlobalSets();
-
- for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
- DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
- if (DI->getIDom())
- localAvail[DI->getBlock()] =
- new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]);
- else
- localAvail[DI->getBlock()] = new ValueNumberScope(0);
- }
-
+
// Top-down walk of the dominator tree
bool Changed = false;
#if 0
@@ -2289,11 +2020,8 @@ bool GVN::iterateOnFunction(Function &F) {
void GVN::cleanupGlobalSets() {
VN.clear();
-
- for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
- I = localAvail.begin(), E = localAvail.end(); I != E; ++I)
- delete I->second;
- localAvail.clear();
+ LeaderTable.clear();
+ TableAllocator.Reset();
}
/// verifyRemoved - Verify that the specified instruction does not occur in our
@@ -2303,17 +2031,14 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
// Walk through the value number scope to make sure the instruction isn't
// ferreted away in it.
- for (DenseMap<BasicBlock*, ValueNumberScope*>::const_iterator
- I = localAvail.begin(), E = localAvail.end(); I != E; ++I) {
- const ValueNumberScope *VNS = I->second;
-
- while (VNS) {
- for (DenseMap<uint32_t, Value*>::const_iterator
- II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) {
- assert(II->second != Inst && "Inst still in value numbering scope!");
- }
-
- VNS = VNS->parent;
+ for (DenseMap<uint32_t, LeaderTableEntry>::const_iterator
+ I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) {
+ const LeaderTableEntry *Node = &I->second;
+ assert(Node->Val != Inst && "Inst still in value numbering scope!");
+
+ while (Node->Next) {
+ Node = Node->Next;
+ assert(Node->Val != Inst && "Inst still in value numbering scope!");
}
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index af2eafc..0fb6798 100644
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -77,7 +77,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- IndVarSimplify() : LoopPass(ID) {}
+ IndVarSimplify() : LoopPass(ID) {
+ initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -117,8 +119,16 @@ namespace {
}
char IndVarSimplify::ID = 0;
-INITIALIZE_PASS(IndVarSimplify, "indvars",
- "Canonicalize Induction Variables", false, false);
+INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
+ "Canonicalize Induction Variables", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(IVUsers)
+INITIALIZE_PASS_END(IndVarSimplify, "indvars",
+ "Canonicalize Induction Variables", false, false)
Pass *llvm::createIndVarSimplifyPass() {
return new IndVarSimplify();
@@ -190,7 +200,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
}
// Expand the code for the iteration count.
- assert(RHS->isLoopInvariant(L) &&
+ assert(SE->isLoopInvariant(RHS, L) &&
"Computed iteration count is not loop invariant!");
Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
@@ -233,8 +243,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
/// happen later, except that it's more powerful in some cases, because it's
/// able to brute-force evaluate arbitrary instructions as long as they have
/// constant operands at the beginning of the loop.
-void IndVarSimplify::RewriteLoopExitValues(Loop *L,
- SCEVExpander &Rewriter) {
+void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
// Verify the input to the pass in already in LCSSA form.
assert(L->isLCSSAForm(*DT));
@@ -292,7 +301,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
// and varies predictably *inside* the loop. Evaluate the value it
// contains when the loop exits, if possible.
const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
- if (!ExitValue->isLoopInvariant(L))
+ if (!SE->isLoopInvariant(ExitValue, L))
continue;
Changed = true;
@@ -338,7 +347,7 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
// If there are, change them into integer recurrences, permitting analysis by
// the SCEV routines.
//
- BasicBlock *Header = L->getHeader();
+ BasicBlock *Header = L->getHeader();
SmallVector<WeakVH, 8> PHIs;
for (BasicBlock::iterator I = Header->begin();
@@ -346,7 +355,7 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
PHIs.push_back(PN);
for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
- if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i]))
+ if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
HandleFloatingPointIV(L, PN);
// If the loop previously had floating-point IV, ScalarEvolution
@@ -395,7 +404,7 @@ void IndVarSimplify::EliminateIVComparisons() {
// which are now dead.
while (!DeadInsts.empty())
if (Instruction *Inst =
- dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+ dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
RecursivelyDeleteTriviallyDeadInstructions(Inst);
}
@@ -462,7 +471,7 @@ void IndVarSimplify::EliminateIVRemainders() {
// which are now dead.
while (!DeadInsts.empty())
if (Instruction *Inst =
- dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+ dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
RecursivelyDeleteTriviallyDeadInstructions(Inst);
}
@@ -607,9 +616,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// currently can only reduce affine polynomials. For now just disable
// indvar subst on anything more complex than an affine addrec, unless
// it can be expanded to a trivial value.
-static bool isSafe(const SCEV *S, const Loop *L) {
+static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
// Loop-invariant values are safe.
- if (S->isLoopInvariant(L)) return true;
+ if (SE->isLoopInvariant(S, L)) return true;
// Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
// to transform them into efficient code.
@@ -620,18 +629,18 @@ static bool isSafe(const SCEV *S, const Loop *L) {
if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
E = Commutative->op_end(); I != E; ++I)
- if (!isSafe(*I, L)) return false;
+ if (!isSafe(*I, L, SE)) return false;
return true;
}
// A cast is safe if its operand is.
if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
- return isSafe(C->getOperand(), L);
+ return isSafe(C->getOperand(), L, SE);
// A udiv is safe if its operands are.
if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
- return isSafe(UD->getLHS(), L) &&
- isSafe(UD->getRHS(), L);
+ return isSafe(UD->getLHS(), L, SE) &&
+ isSafe(UD->getRHS(), L, SE);
// SCEVUnknown is always safe.
if (isa<SCEVUnknown>(S))
@@ -662,7 +671,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
// Evaluate the expression out of the loop, if possible.
if (!L->contains(UI->getUser())) {
const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
- if (ExitVal->isLoopInvariant(L))
+ if (SE->isLoopInvariant(ExitVal, L))
AR = ExitVal;
}
@@ -672,7 +681,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
// currently can only reduce affine polynomials. For now just disable
// indvar subst on anything more complex than an affine addrec, unless
// it can be expanded to a trivial value.
- if (!isSafe(AR, L))
+ if (!isSafe(AR, L, SE))
continue;
// Determine the insertion point for this user. By default, insert
@@ -725,7 +734,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
// which are now dead.
while (!DeadInsts.empty())
if (Instruction *Inst =
- dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+ dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
RecursivelyDeleteTriviallyDeadInstructions(Inst);
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 104d5ae..90094a8 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -40,20 +40,22 @@ STATISTIC(NumFolds, "Number of terminators folded");
STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
static cl::opt<unsigned>
-Threshold("jump-threading-threshold",
+Threshold("jump-threading-threshold",
cl::desc("Max block size to duplicate for jump threading"),
cl::init(6), cl::Hidden);
-// Turn on use of LazyValueInfo.
-static cl::opt<bool>
-EnableLVI("enable-jump-threading-lvi",
- cl::desc("Use LVI for jump threading"),
- cl::init(true),
- cl::ReallyHidden);
-
-
-
namespace {
+ // These are at global scope so static functions can use them too.
+ typedef SmallVectorImpl<std::pair<Constant*, BasicBlock*> > PredValueInfo;
+ typedef SmallVector<std::pair<Constant*, BasicBlock*>, 8> PredValueInfoTy;
+
+ // This is used to keep track of what kind of constant we're currently hoping
+ // to find.
+ enum ConstantPreference {
+ WantInteger,
+ WantBlockAddress
+ };
+
/// This pass performs 'jump threading', which looks at blocks that have
/// multiple predecessors and multiple successors. If one or more of the
/// predecessors of the block can be proven to always jump to one of the
@@ -79,61 +81,59 @@ namespace {
SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
#endif
DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
-
+
// RAII helper for updating the recursion stack.
struct RecursionSetRemover {
DenseSet<std::pair<Value*, BasicBlock*> > &TheSet;
std::pair<Value*, BasicBlock*> ThePair;
-
+
RecursionSetRemover(DenseSet<std::pair<Value*, BasicBlock*> > &S,
std::pair<Value*, BasicBlock*> P)
: TheSet(S), ThePair(P) { }
-
+
~RecursionSetRemover() {
TheSet.erase(ThePair);
}
};
public:
static char ID; // Pass identification
- JumpThreading() : FunctionPass(ID) {}
+ JumpThreading() : FunctionPass(ID) {
+ initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
+ }
bool runOnFunction(Function &F);
-
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- if (EnableLVI) {
- AU.addRequired<LazyValueInfo>();
- AU.addPreserved<LazyValueInfo>();
- }
+ AU.addRequired<LazyValueInfo>();
+ AU.addPreserved<LazyValueInfo>();
}
-
+
void FindLoopHeaders(Function &F);
bool ProcessBlock(BasicBlock *BB);
bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
BasicBlock *SuccBB);
bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
const SmallVectorImpl<BasicBlock *> &PredBBs);
-
- typedef SmallVectorImpl<std::pair<ConstantInt*,
- BasicBlock*> > PredValueInfo;
-
+
bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
- PredValueInfo &Result);
- bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB);
-
-
- bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
- bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
+ PredValueInfo &Result,
+ ConstantPreference Preference);
+ bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
+ ConstantPreference Preference);
bool ProcessBranchOnPHI(PHINode *PN);
bool ProcessBranchOnXOR(BinaryOperator *BO);
-
+
bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
};
}
char JumpThreading::ID = 0;
-INITIALIZE_PASS(JumpThreading, "jump-threading",
- "Jump Threading", false, false);
+INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
+ "Jump Threading", false, false)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_END(JumpThreading, "jump-threading",
+ "Jump Threading", false, false)
// Public interface to the Jump Threading pass
FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
@@ -143,21 +143,21 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
bool JumpThreading::runOnFunction(Function &F) {
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
TD = getAnalysisIfAvailable<TargetData>();
- LVI = EnableLVI ? &getAnalysis<LazyValueInfo>() : 0;
-
+ LVI = &getAnalysis<LazyValueInfo>();
+
FindLoopHeaders(F);
-
+
bool Changed, EverChanged = false;
do {
Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
BasicBlock *BB = I;
- // Thread all of the branches we can over this block.
+ // Thread all of the branches we can over this block.
while (ProcessBlock(BB))
Changed = true;
-
+
++I;
-
+
// If the block is trivially dead, zap it. This eliminates the successor
// edges which simplifies the CFG.
if (pred_begin(BB) == pred_end(BB) &&
@@ -165,48 +165,46 @@ bool JumpThreading::runOnFunction(Function &F) {
DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName()
<< "' with terminator: " << *BB->getTerminator() << '\n');
LoopHeaders.erase(BB);
- if (LVI) LVI->eraseBlock(BB);
+ LVI->eraseBlock(BB);
DeleteDeadBlock(BB);
Changed = true;
- } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
- // Can't thread an unconditional jump, but if the block is "almost
- // empty", we can replace uses of it with uses of the successor and make
- // this dead.
- if (BI->isUnconditional() &&
- BB != &BB->getParent()->getEntryBlock()) {
- BasicBlock::iterator BBI = BB->getFirstNonPHI();
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(BBI))
- ++BBI;
+ continue;
+ }
+
+ BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+
+ // Can't thread an unconditional jump, but if the block is "almost
+ // empty", we can replace uses of it with uses of the successor and make
+ // this dead.
+ if (BI && BI->isUnconditional() &&
+ BB != &BB->getParent()->getEntryBlock() &&
// If the terminator is the only non-phi instruction, try to nuke it.
- if (BBI->isTerminator()) {
- // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
- // block, we have to make sure it isn't in the LoopHeaders set. We
- // reinsert afterward if needed.
- bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
- BasicBlock *Succ = BI->getSuccessor(0);
-
- // FIXME: It is always conservatively correct to drop the info
- // for a block even if it doesn't get erased. This isn't totally
- // awesome, but it allows us to use AssertingVH to prevent nasty
- // dangling pointer issues within LazyValueInfo.
- if (LVI) LVI->eraseBlock(BB);
- if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
- Changed = true;
- // If we deleted BB and BB was the header of a loop, then the
- // successor is now the header of the loop.
- BB = Succ;
- }
-
- if (ErasedFromLoopHeaders)
- LoopHeaders.insert(BB);
- }
+ BB->getFirstNonPHIOrDbg()->isTerminator()) {
+ // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
+ // block, we have to make sure it isn't in the LoopHeaders set. We
+ // reinsert afterward if needed.
+ bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
+ BasicBlock *Succ = BI->getSuccessor(0);
+
+ // FIXME: It is always conservatively correct to drop the info
+ // for a block even if it doesn't get erased. This isn't totally
+ // awesome, but it allows us to use AssertingVH to prevent nasty
+ // dangling pointer issues within LazyValueInfo.
+ LVI->eraseBlock(BB);
+ if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
+ Changed = true;
+ // If we deleted BB and BB was the header of a loop, then the
+ // successor is now the header of the loop.
+ BB = Succ;
}
+
+ if (ErasedFromLoopHeaders)
+ LoopHeaders.insert(BB);
}
}
EverChanged |= Changed;
} while (Changed);
-
+
LoopHeaders.clear();
return EverChanged;
}
@@ -216,25 +214,25 @@ bool JumpThreading::runOnFunction(Function &F) {
static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
/// Ignore PHI nodes, these will be flattened when duplication happens.
BasicBlock::const_iterator I = BB->getFirstNonPHI();
-
+
// FIXME: THREADING will delete values that are just used to compute the
// branch, so they shouldn't count against the duplication cost.
-
-
+
+
// Sum up the cost of each instruction until we get to the terminator. Don't
// include the terminator because the copy won't include it.
unsigned Size = 0;
for (; !isa<TerminatorInst>(I); ++I) {
// Debugger intrinsics don't incur code size.
if (isa<DbgInfoIntrinsic>(I)) continue;
-
+
// If this is a pointer->pointer bitcast, it is free.
if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
continue;
-
+
// All other instructions count for at least one unit.
++Size;
-
+
// Calls are more expensive. If they are non-intrinsic calls, we model them
// as having cost of 4. If they are a non-vector intrinsic, we model them
// as having cost of 2 total, and if they are a vector intrinsic, we model
@@ -246,12 +244,16 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
Size += 1;
}
}
-
+
// Threading through a switch statement is particularly profitable. If this
// block ends in a switch, decrease its cost to make it more likely to happen.
if (isa<SwitchInst>(I))
Size = Size > 6 ? Size-6 : 0;
-
+
+ // The same holds for indirect branches, but slightly more so.
+ if (isa<IndirectBrInst>(I))
+ Size = Size > 8 ? Size-8 : 0;
+
return Size;
}
@@ -273,57 +275,64 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
void JumpThreading::FindLoopHeaders(Function &F) {
SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
FindFunctionBackedges(F, Edges);
-
+
for (unsigned i = 0, e = Edges.size(); i != e; ++i)
LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
}
-// Helper method for ComputeValueKnownInPredecessors. If Value is a
-// ConstantInt, push it. If it's an undef, push 0. Otherwise, do nothing.
-static void PushConstantIntOrUndef(SmallVectorImpl<std::pair<ConstantInt*,
- BasicBlock*> > &Result,
- Constant *Value, BasicBlock* BB){
- if (ConstantInt *FoldedCInt = dyn_cast<ConstantInt>(Value))
- Result.push_back(std::make_pair(FoldedCInt, BB));
- else if (isa<UndefValue>(Value))
- Result.push_back(std::make_pair((ConstantInt*)0, BB));
+/// getKnownConstant - Helper method to determine if we can thread over a
+/// terminator with the given value as its condition, and if so what value to
+/// use for that. What kind of value this is depends on whether we want an
+/// integer or a block address, but an undef is always accepted.
+/// Returns null if Val is null or not an appropriate constant.
+static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
+ if (!Val)
+ return 0;
+
+ // Undef is "known" enough.
+ if (UndefValue *U = dyn_cast<UndefValue>(Val))
+ return U;
+
+ if (Preference == WantBlockAddress)
+ return dyn_cast<BlockAddress>(Val->stripPointerCasts());
+
+ return dyn_cast<ConstantInt>(Val);
}
/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
-/// if we can infer that the value is a known ConstantInt in any of our
-/// predecessors. If so, return the known list of value and pred BB in the
-/// result vector. If a value is known to be undef, it is returned as null.
+/// if we can infer that the value is a known ConstantInt/BlockAddress or undef
+/// in any of our predecessors. If so, return the known list of value and pred
+/// BB in the result vector.
///
/// This returns true if there were any known values.
///
bool JumpThreading::
-ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
+ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
+ ConstantPreference Preference) {
// This method walks up use-def chains recursively. Because of this, we could
// get into an infinite loop going around loops in the use-def chain. To
// prevent this, keep track of what (value, block) pairs we've already visited
// and terminate the search if we loop back to them
if (!RecursionSet.insert(std::make_pair(V, BB)).second)
return false;
-
+
// An RAII help to remove this pair from the recursion set once the recursion
// stack pops back out again.
RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
-
- // If V is a constantint, then it is known in all predecessors.
- if (isa<ConstantInt>(V) || isa<UndefValue>(V)) {
- ConstantInt *CI = dyn_cast<ConstantInt>(V);
-
+
+ // If V is a constant, then it is known in all predecessors.
+ if (Constant *KC = getKnownConstant(V, Preference)) {
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- Result.push_back(std::make_pair(CI, *PI));
-
+ Result.push_back(std::make_pair(KC, *PI));
+
return true;
}
-
+
// If V is a non-instruction value, or an instruction in a different block,
// then it can't be derived from a PHI.
Instruction *I = dyn_cast<Instruction>(V);
if (I == 0 || I->getParent() != BB) {
-
+
// Okay, if this is a live-in value, see if it has a known value at the end
// of any of our predecessors.
//
@@ -331,82 +340,78 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
/// TODO: Per PR2563, we could infer value range information about a
/// predecessor based on its terminator.
//
- if (LVI) {
- // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
- // "I" is a non-local compare-with-a-constant instruction. This would be
- // able to handle value inequalities better, for example if the compare is
- // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
- // Perhaps getConstantOnEdge should be smart enough to do this?
-
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *P = *PI;
- // If the value is known by LazyValueInfo to be a constant in a
- // predecessor, use that information to try to thread this block.
- Constant *PredCst = LVI->getConstantOnEdge(V, P, BB);
- if (PredCst == 0 ||
- (!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst)))
- continue;
-
- Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), P));
- }
-
- return !Result.empty();
+ // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
+ // "I" is a non-local compare-with-a-constant instruction. This would be
+ // able to handle value inequalities better, for example if the compare is
+ // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
+ // Perhaps getConstantOnEdge should be smart enough to do this?
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ // If the value is known by LazyValueInfo to be a constant in a
+ // predecessor, use that information to try to thread this block.
+ Constant *PredCst = LVI->getConstantOnEdge(V, P, BB);
+ if (Constant *KC = getKnownConstant(PredCst, Preference))
+ Result.push_back(std::make_pair(KC, P));
}
-
- return false;
+
+ return !Result.empty();
}
-
+
/// If I is a PHI node, then we know the incoming values for any constants.
if (PHINode *PN = dyn_cast<PHINode>(I)) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *InVal = PN->getIncomingValue(i);
- if (isa<ConstantInt>(InVal) || isa<UndefValue>(InVal)) {
- ConstantInt *CI = dyn_cast<ConstantInt>(InVal);
- Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i)));
- } else if (LVI) {
+ if (Constant *KC = getKnownConstant(InVal, Preference)) {
+ Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
+ } else {
Constant *CI = LVI->getConstantOnEdge(InVal,
PN->getIncomingBlock(i), BB);
- // LVI returns null is no value could be determined.
- if (!CI) continue;
- PushConstantIntOrUndef(Result, CI, PN->getIncomingBlock(i));
+ if (Constant *KC = getKnownConstant(CI, Preference))
+ Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
}
}
-
+
return !Result.empty();
}
-
- SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals, RHSVals;
+
+ PredValueInfoTy LHSVals, RHSVals;
// Handle some boolean conditions.
- if (I->getType()->getPrimitiveSizeInBits() == 1) {
+ if (I->getType()->getPrimitiveSizeInBits() == 1) {
+ assert(Preference == WantInteger && "One-bit non-integer type?");
// X | true -> true
// X & false -> false
if (I->getOpcode() == Instruction::Or ||
I->getOpcode() == Instruction::And) {
- ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
- ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals);
-
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
+ WantInteger);
+ ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals,
+ WantInteger);
+
if (LHSVals.empty() && RHSVals.empty())
return false;
-
+
ConstantInt *InterestingVal;
if (I->getOpcode() == Instruction::Or)
InterestingVal = ConstantInt::getTrue(I->getContext());
else
InterestingVal = ConstantInt::getFalse(I->getContext());
-
+
SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
-
+
// Scan for the sentinel. If we find an undef, force it to the
// interesting value: x|undef -> true and x&undef -> false.
for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
- if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) {
+ if (LHSVals[i].first == InterestingVal ||
+ isa<UndefValue>(LHSVals[i].first)) {
Result.push_back(LHSVals[i]);
Result.back().first = InterestingVal;
LHSKnownBBs.insert(LHSVals[i].second);
}
for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
- if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) {
+ if (RHSVals[i].first == InterestingVal ||
+ isa<UndefValue>(RHSVals[i].first)) {
// If we already inferred a value for this block on the LHS, don't
// re-add it.
if (!LHSKnownBBs.count(RHSVals[i].second)) {
@@ -414,48 +419,51 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
Result.back().first = InterestingVal;
}
}
-
+
return !Result.empty();
}
-
+
// Handle the NOT form of XOR.
if (I->getOpcode() == Instruction::Xor &&
isa<ConstantInt>(I->getOperand(1)) &&
cast<ConstantInt>(I->getOperand(1))->isOne()) {
- ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result);
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result,
+ WantInteger);
if (Result.empty())
return false;
// Invert the known values.
for (unsigned i = 0, e = Result.size(); i != e; ++i)
- if (Result[i].first)
- Result[i].first =
- cast<ConstantInt>(ConstantExpr::getNot(Result[i].first));
-
+ Result[i].first = ConstantExpr::getNot(Result[i].first);
+
return true;
}
-
+
// Try to simplify some other binary operator values.
} else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ assert(Preference != WantBlockAddress
+ && "A binary operator creating a block address?");
if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
- SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
- ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals);
-
+ PredValueInfoTy LHSVals;
+ ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals,
+ WantInteger);
+
// Try to use constant folding to simplify the binary operator.
for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
- Constant *V = LHSVals[i].first ? LHSVals[i].first :
- cast<Constant>(UndefValue::get(BO->getType()));
+ Constant *V = LHSVals[i].first;
Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
-
- PushConstantIntOrUndef(Result, Folded, LHSVals[i].second);
+
+ if (Constant *KC = getKnownConstant(Folded, WantInteger))
+ Result.push_back(std::make_pair(KC, LHSVals[i].second));
}
}
-
+
return !Result.empty();
}
-
+
// Handle compare with phi operand, where the PHI is defined in this block.
if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
+ assert(Preference == WantInteger && "Compares only produce integers");
PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0));
if (PN && PN->getParent() == BB) {
// We can do this simplification if any comparisons fold to true or false.
@@ -464,32 +472,31 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
BasicBlock *PredBB = PN->getIncomingBlock(i);
Value *LHS = PN->getIncomingValue(i);
Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
-
+
Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD);
if (Res == 0) {
- if (!LVI || !isa<Constant>(RHS))
+ if (!isa<Constant>(RHS))
continue;
-
- LazyValueInfo::Tristate
+
+ LazyValueInfo::Tristate
ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS,
cast<Constant>(RHS), PredBB, BB);
if (ResT == LazyValueInfo::Unknown)
continue;
Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
}
-
- if (Constant *ConstRes = dyn_cast<Constant>(Res))
- PushConstantIntOrUndef(Result, ConstRes, PredBB);
+
+ if (Constant *KC = getKnownConstant(Res, WantInteger))
+ Result.push_back(std::make_pair(KC, PredBB));
}
-
+
return !Result.empty();
}
-
-
+
+
// If comparing a live-in value against a constant, see if we know the
// live-in value on any predecessors.
- if (LVI && isa<Constant>(Cmp->getOperand(1)) &&
- Cmp->getType()->isIntegerTy()) {
+ if (isa<Constant>(Cmp->getOperand(1)) && Cmp->getType()->isIntegerTy()) {
if (!isa<Instruction>(Cmp->getOperand(0)) ||
cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) {
Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
@@ -505,44 +512,74 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
continue;
Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
- Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P));
+ Result.push_back(std::make_pair(ResC, P));
}
return !Result.empty();
}
-
+
// Try to find a constant value for the LHS of a comparison,
// and evaluate it statically if we can.
if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) {
- SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
- ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
-
+ PredValueInfoTy LHSVals;
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
+ WantInteger);
+
for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
- Constant *V = LHSVals[i].first ? LHSVals[i].first :
- cast<Constant>(UndefValue::get(CmpConst->getType()));
+ Constant *V = LHSVals[i].first;
Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
V, CmpConst);
- PushConstantIntOrUndef(Result, Folded, LHSVals[i].second);
+ if (Constant *KC = getKnownConstant(Folded, WantInteger))
+ Result.push_back(std::make_pair(KC, LHSVals[i].second));
}
-
+
return !Result.empty();
}
}
}
-
- if (LVI) {
- // If all else fails, see if LVI can figure out a constant value for us.
- Constant *CI = LVI->getConstant(V, BB);
- ConstantInt *CInt = dyn_cast_or_null<ConstantInt>(CI);
- if (CInt) {
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- Result.push_back(std::make_pair(CInt, *PI));
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ // Handle select instructions where at least one operand is a known constant
+ // and we can figure out the condition value for any predecessor block.
+ Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
+ Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
+ PredValueInfoTy Conds;
+ if ((TrueVal || FalseVal) &&
+ ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
+ WantInteger)) {
+ for (unsigned i = 0, e = Conds.size(); i != e; ++i) {
+ Constant *Cond = Conds[i].first;
+
+ // Figure out what value to use for the condition.
+ bool KnownCond;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
+ // A known boolean.
+ KnownCond = CI->isOne();
+ } else {
+ assert(isa<UndefValue>(Cond) && "Unexpected condition value");
+ // Either operand will do, so be sure to pick the one that's a known
+ // constant.
+ // FIXME: Do this more cleverly if both values are known constants?
+ KnownCond = (TrueVal != 0);
+ }
+
+ // See if the select has a known constant value for this predecessor.
+ if (Constant *Val = KnownCond ? TrueVal : FalseVal)
+ Result.push_back(std::make_pair(Val, Conds[i].second));
+ }
+
+ return !Result.empty();
}
-
- return !Result.empty();
}
-
- return false;
+
+ // If all else fails, see if LVI can figure out a constant value for us.
+ Constant *CI = LVI->getConstant(V, BB);
+ if (Constant *KC = getKnownConstant(CI, Preference)) {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Result.push_back(std::make_pair(KC, *PI));
+ }
+
+ return !Result.empty();
}
@@ -565,10 +602,20 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
if (NumPreds < MinNumPreds)
MinSucc = i;
}
-
+
return MinSucc;
}
+static bool hasAddressTakenAndUsed(BasicBlock *BB) {
+ if (!BB->hasAddressTaken()) return false;
+
+ // If the block has its address taken, it may be a tree of dead constants
+ // hanging off of it. These shouldn't keep the block alive.
+ BlockAddress *BA = BlockAddress::get(BB);
+ BA->removeDeadConstantUsers();
+ return !BA->use_empty();
+}
+
/// ProcessBlock - If there are any predecessors whose control can be threaded
/// through to a successor, transform them now.
bool JumpThreading::ProcessBlock(BasicBlock *BB) {
@@ -577,167 +624,122 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
if (pred_begin(BB) == pred_end(BB) &&
BB != &BB->getParent()->getEntryBlock())
return false;
-
+
// If this block has a single predecessor, and if that pred has a single
// successor, merge the blocks. This encourages recursive jump threading
// because now the condition in this block can be threaded through
// predecessors of our predecessor block.
if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
if (SinglePred->getTerminator()->getNumSuccessors() == 1 &&
- SinglePred != BB) {
+ SinglePred != BB && !hasAddressTakenAndUsed(BB)) {
// If SinglePred was a loop header, BB becomes one.
if (LoopHeaders.erase(SinglePred))
LoopHeaders.insert(BB);
-
+
// Remember if SinglePred was the entry block of the function. If so, we
// will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
- if (LVI) LVI->eraseBlock(SinglePred);
+ LVI->eraseBlock(SinglePred);
MergeBasicBlockIntoOnlyPred(BB);
-
+
if (isEntry && BB != &BB->getParent()->getEntryBlock())
BB->moveBefore(&BB->getParent()->getEntryBlock());
return true;
}
}
- // Look to see if the terminator is a branch of switch, if not we can't thread
- // it.
+ // What kind of constant we're looking for.
+ ConstantPreference Preference = WantInteger;
+
+ // Look to see if the terminator is a conditional branch, switch or indirect
+ // branch, if not we can't thread it.
Value *Condition;
- if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ Instruction *Terminator = BB->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
// Can't thread an unconditional jump.
if (BI->isUnconditional()) return false;
Condition = BI->getCondition();
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
Condition = SI->getCondition();
- else
+ } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
+ Condition = IB->getAddress()->stripPointerCasts();
+ Preference = WantBlockAddress;
+ } else {
return false; // Must be an invoke.
-
- // If the terminator of this block is branching on a constant, simplify the
- // terminator to an unconditional branch. This can occur due to threading in
- // other blocks.
- if (isa<ConstantInt>(Condition)) {
- DEBUG(dbgs() << " In block '" << BB->getName()
- << "' folding terminator: " << *BB->getTerminator() << '\n');
- ++NumFolds;
- ConstantFoldTerminator(BB);
- return true;
}
-
+
// If the terminator is branching on an undef, we can pick any of the
// successors to branch to. Let GetBestDestForJumpOnUndef decide.
if (isa<UndefValue>(Condition)) {
unsigned BestSucc = GetBestDestForJumpOnUndef(BB);
-
+
// Fold the branch/switch.
TerminatorInst *BBTerm = BB->getTerminator();
for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
if (i == BestSucc) continue;
- RemovePredecessorAndSimplify(BBTerm->getSuccessor(i), BB, TD);
+ BBTerm->getSuccessor(i)->removePredecessor(BB, true);
}
-
+
DEBUG(dbgs() << " In block '" << BB->getName()
<< "' folding undef terminator: " << *BBTerm << '\n');
BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
BBTerm->eraseFromParent();
return true;
}
-
- Instruction *CondInst = dyn_cast<Instruction>(Condition);
- // If the condition is an instruction defined in another block, see if a
- // predecessor has the same condition:
- // br COND, BBX, BBY
- // BBX:
- // br COND, BBZ, BBW
- if (!LVI &&
- !Condition->hasOneUse() && // Multiple uses.
- (CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition.
- pred_iterator PI = pred_begin(BB), E = pred_end(BB);
- if (isa<BranchInst>(BB->getTerminator())) {
- for (; PI != E; ++PI) {
- BasicBlock *P = *PI;
- if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator()))
- if (PBI->isConditional() && PBI->getCondition() == Condition &&
- ProcessBranchOnDuplicateCond(P, BB))
- return true;
- }
- } else {
- assert(isa<SwitchInst>(BB->getTerminator()) && "Unknown jump terminator");
- for (; PI != E; ++PI) {
- BasicBlock *P = *PI;
- if (SwitchInst *PSI = dyn_cast<SwitchInst>(P->getTerminator()))
- if (PSI->getCondition() == Condition &&
- ProcessSwitchOnDuplicateCond(P, BB))
- return true;
- }
- }
+ // If the terminator of this block is branching on a constant, simplify the
+ // terminator to an unconditional branch. This can occur due to threading in
+ // other blocks.
+ if (getKnownConstant(Condition, Preference)) {
+ DEBUG(dbgs() << " In block '" << BB->getName()
+ << "' folding terminator: " << *BB->getTerminator() << '\n');
+ ++NumFolds;
+ ConstantFoldTerminator(BB);
+ return true;
}
+ Instruction *CondInst = dyn_cast<Instruction>(Condition);
+
// All the rest of our checks depend on the condition being an instruction.
if (CondInst == 0) {
// FIXME: Unify this with code below.
- if (LVI && ProcessThreadableEdges(Condition, BB))
+ if (ProcessThreadableEdges(Condition, BB, Preference))
return true;
return false;
- }
-
-
+ }
+
+
if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
- if (!LVI &&
- (!isa<PHINode>(CondCmp->getOperand(0)) ||
- cast<PHINode>(CondCmp->getOperand(0))->getParent() != BB)) {
- // If we have a comparison, loop over the predecessors to see if there is
- // a condition with a lexically identical value.
- pred_iterator PI = pred_begin(BB), E = pred_end(BB);
- for (; PI != E; ++PI) {
- BasicBlock *P = *PI;
- if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator()))
- if (PBI->isConditional() && P != BB) {
- if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) {
- if (CI->getOperand(0) == CondCmp->getOperand(0) &&
- CI->getOperand(1) == CondCmp->getOperand(1) &&
- CI->getPredicate() == CondCmp->getPredicate()) {
- // TODO: Could handle things like (x != 4) --> (x == 17)
- if (ProcessBranchOnDuplicateCond(P, BB))
- return true;
- }
- }
- }
- }
- }
-
// For a comparison where the LHS is outside this block, it's possible
// that we've branched on it before. Used LVI to see if we can simplify
// the branch based on that.
BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
- if (LVI && CondBr && CondConst && CondBr->isConditional() && PI != PE &&
+ if (CondBr && CondConst && CondBr->isConditional() && PI != PE &&
(!isa<Instruction>(CondCmp->getOperand(0)) ||
cast<Instruction>(CondCmp->getOperand(0))->getParent() != BB)) {
// For predecessor edge, determine if the comparison is true or false
// on that edge. If they're all true or all false, we can simplify the
// branch.
// FIXME: We could handle mixed true/false by duplicating code.
- LazyValueInfo::Tristate Baseline =
+ LazyValueInfo::Tristate Baseline =
LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0),
CondConst, *PI, BB);
if (Baseline != LazyValueInfo::Unknown) {
// Check that all remaining incoming values match the first one.
while (++PI != PE) {
- LazyValueInfo::Tristate Ret = LVI->getPredicateOnEdge(
- CondCmp->getPredicate(),
- CondCmp->getOperand(0),
- CondConst, *PI, BB);
+ LazyValueInfo::Tristate Ret =
+ LVI->getPredicateOnEdge(CondCmp->getPredicate(),
+ CondCmp->getOperand(0), CondConst, *PI, BB);
if (Ret != Baseline) break;
}
-
+
// If we terminated early, then one of the values didn't match.
if (PI == PE) {
unsigned ToRemove = Baseline == LazyValueInfo::True ? 1 : 0;
unsigned ToKeep = Baseline == LazyValueInfo::True ? 0 : 1;
- RemovePredecessorAndSimplify(CondBr->getSuccessor(ToRemove), BB, TD);
+ CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true);
BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
CondBr->eraseFromParent();
return true;
@@ -755,174 +757,37 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
if (isa<Constant>(CondCmp->getOperand(1)))
SimplifyValue = CondCmp->getOperand(0);
-
+
// TODO: There are other places where load PRE would be profitable, such as
// more complex comparisons.
if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue))
if (SimplifyPartiallyRedundantLoad(LI))
return true;
-
-
+
+
// Handle a variety of cases where we are branching on something derived from
// a PHI node in the current block. If we can prove that any predecessors
// compute a predictable value based on a PHI node, thread those predecessors.
//
- if (ProcessThreadableEdges(CondInst, BB))
+ if (ProcessThreadableEdges(CondInst, BB, Preference))
return true;
-
+
// If this is an otherwise-unfoldable branch on a phi node in the current
// block, see if we can simplify.
if (PHINode *PN = dyn_cast<PHINode>(CondInst))
if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
return ProcessBranchOnPHI(PN);
-
-
+
+
// If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
if (CondInst->getOpcode() == Instruction::Xor &&
CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst));
-
-
- // TODO: If we have: "br (X > 0)" and we have a predecessor where we know
- // "(X == 4)", thread through this block.
-
- return false;
-}
-/// ProcessBranchOnDuplicateCond - We found a block and a predecessor of that
-/// block that jump on exactly the same condition. This means that we almost
-/// always know the direction of the edge in the DESTBB:
-/// PREDBB:
-/// br COND, DESTBB, BBY
-/// DESTBB:
-/// br COND, BBZ, BBW
-///
-/// If DESTBB has multiple predecessors, we can't just constant fold the branch
-/// in DESTBB, we have to thread over it.
-bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB,
- BasicBlock *BB) {
- BranchInst *PredBI = cast<BranchInst>(PredBB->getTerminator());
-
- // If both successors of PredBB go to DESTBB, we don't know anything. We can
- // fold the branch to an unconditional one, which allows other recursive
- // simplifications.
- bool BranchDir;
- if (PredBI->getSuccessor(1) != BB)
- BranchDir = true;
- else if (PredBI->getSuccessor(0) != BB)
- BranchDir = false;
- else {
- DEBUG(dbgs() << " In block '" << PredBB->getName()
- << "' folding terminator: " << *PredBB->getTerminator() << '\n');
- ++NumFolds;
- ConstantFoldTerminator(PredBB);
- return true;
- }
-
- BranchInst *DestBI = cast<BranchInst>(BB->getTerminator());
- // If the dest block has one predecessor, just fix the branch condition to a
- // constant and fold it.
- if (BB->getSinglePredecessor()) {
- DEBUG(dbgs() << " In block '" << BB->getName()
- << "' folding condition to '" << BranchDir << "': "
- << *BB->getTerminator() << '\n');
- ++NumFolds;
- Value *OldCond = DestBI->getCondition();
- DestBI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
- BranchDir));
- // Delete dead instructions before we fold the branch. Folding the branch
- // can eliminate edges from the CFG which can end up deleting OldCond.
- RecursivelyDeleteTriviallyDeadInstructions(OldCond);
- ConstantFoldTerminator(BB);
- return true;
- }
-
-
- // Next, figure out which successor we are threading to.
- BasicBlock *SuccBB = DestBI->getSuccessor(!BranchDir);
-
- SmallVector<BasicBlock*, 2> Preds;
- Preds.push_back(PredBB);
-
- // Ok, try to thread it!
- return ThreadEdge(BB, Preds, SuccBB);
-}
-
-/// ProcessSwitchOnDuplicateCond - We found a block and a predecessor of that
-/// block that switch on exactly the same condition. This means that we almost
-/// always know the direction of the edge in the DESTBB:
-/// PREDBB:
-/// switch COND [... DESTBB, BBY ... ]
-/// DESTBB:
-/// switch COND [... BBZ, BBW ]
-///
-/// Optimizing switches like this is very important, because simplifycfg builds
-/// switches out of repeated 'if' conditions.
-bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB,
- BasicBlock *DestBB) {
- // Can't thread edge to self.
- if (PredBB == DestBB)
- return false;
-
- SwitchInst *PredSI = cast<SwitchInst>(PredBB->getTerminator());
- SwitchInst *DestSI = cast<SwitchInst>(DestBB->getTerminator());
-
- // There are a variety of optimizations that we can potentially do on these
- // blocks: we order them from most to least preferable.
-
- // If DESTBB *just* contains the switch, then we can forward edges from PREDBB
- // directly to their destination. This does not introduce *any* code size
- // growth. Skip debug info first.
- BasicBlock::iterator BBI = DestBB->begin();
- while (isa<DbgInfoIntrinsic>(BBI))
- BBI++;
-
- // FIXME: Thread if it just contains a PHI.
- if (isa<SwitchInst>(BBI)) {
- bool MadeChange = false;
- // Ignore the default edge for now.
- for (unsigned i = 1, e = DestSI->getNumSuccessors(); i != e; ++i) {
- ConstantInt *DestVal = DestSI->getCaseValue(i);
- BasicBlock *DestSucc = DestSI->getSuccessor(i);
-
- // Okay, DestSI has a case for 'DestVal' that goes to 'DestSucc'. See if
- // PredSI has an explicit case for it. If so, forward. If it is covered
- // by the default case, we can't update PredSI.
- unsigned PredCase = PredSI->findCaseValue(DestVal);
- if (PredCase == 0) continue;
-
- // If PredSI doesn't go to DestBB on this value, then it won't reach the
- // case on this condition.
- if (PredSI->getSuccessor(PredCase) != DestBB &&
- DestSI->getSuccessor(i) != DestBB)
- continue;
-
- // Do not forward this if it already goes to this destination, this would
- // be an infinite loop.
- if (PredSI->getSuccessor(PredCase) == DestSucc)
- continue;
-
- // Otherwise, we're safe to make the change. Make sure that the edge from
- // DestSI to DestSucc is not critical and has no PHI nodes.
- DEBUG(dbgs() << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI);
- DEBUG(dbgs() << "THROUGH: " << *DestSI);
+ // TODO: If we have: "br (X > 0)" and we have a predecessor where we know
+ // "(X == 4)", thread through this block.
- // If the destination has PHI nodes, just split the edge for updating
- // simplicity.
- if (isa<PHINode>(DestSucc->begin()) && !DestSucc->getSinglePredecessor()){
- SplitCriticalEdge(DestSI, i, this);
- DestSucc = DestSI->getSuccessor(i);
- }
- FoldSingleEntryPHINodes(DestSucc);
- PredSI->setSuccessor(PredCase, DestSucc);
- MadeChange = true;
- }
-
- if (MadeChange)
- return true;
- }
-
return false;
}
@@ -934,13 +799,13 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB,
bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Don't hack volatile loads.
if (LI->isVolatile()) return false;
-
+
// If the load is defined in a block with exactly one predecessor, it can't be
// partially redundant.
BasicBlock *LoadBB = LI->getParent();
if (LoadBB->getSinglePredecessor())
return false;
-
+
Value *LoadedPtr = LI->getOperand(0);
// If the loaded operand is defined in the LoadBB, it can't be available.
@@ -948,17 +813,17 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr))
if (PtrOp->getParent() == LoadBB)
return false;
-
+
// Scan a few instructions up from the load, to see if it is obviously live at
// the entry to its block.
BasicBlock::iterator BBIt = LI;
- if (Value *AvailableVal =
+ if (Value *AvailableVal =
FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) {
// If the value if the load is locally available within the block, just use
// it. This frequently occurs for reg2mem'd allocas.
//cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n";
-
+
// If the returned value is the load itself, replace with an undef. This can
// only happen in dead loops.
if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());
@@ -972,13 +837,13 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// might clobber its value.
if (BBIt != LoadBB->begin())
return false;
-
-
+
+
SmallPtrSet<BasicBlock*, 8> PredsScanned;
typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
AvailablePredsTy AvailablePreds;
BasicBlock *OneUnavailablePred = 0;
-
+
// If we got here, the loaded value is transparent through to the start of the
// block. Check to see if it is available in any of the predecessor blocks.
for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
@@ -996,23 +861,23 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
OneUnavailablePred = PredBB;
continue;
}
-
+
// If so, this load is partially redundant. Remember this info so that we
// can create a PHI node.
AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable));
}
-
+
// If the loaded value isn't available in any predecessor, it isn't partially
// redundant.
if (AvailablePreds.empty()) return false;
-
+
// Okay, the loaded value is available in at least one (and maybe all!)
// predecessors. If the value is unavailable in more than one unique
// predecessor, we want to insert a merge block for those common predecessors.
// This ensures that we only have to insert one reload, thus not increasing
// code size.
BasicBlock *UnavailablePred = 0;
-
+
// If there is exactly one predecessor where the value is unavailable, the
// already computed 'OneUnavailablePred' block is it. If it ends in an
// unconditional branch, we know that it isn't a critical edge.
@@ -1035,17 +900,17 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// If the predecessor is an indirect goto, we can't split the edge.
if (isa<IndirectBrInst>(P->getTerminator()))
return false;
-
+
if (!AvailablePredSet.count(P))
PredsToSplit.push_back(P);
}
-
+
// Split them out to their own block.
UnavailablePred =
SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
"thread-pre-split", this);
}
-
+
// If the value isn't available in all predecessors, then there will be
// exactly one where it isn't available. Insert a load on that edge and add
// it to the AvailablePreds list.
@@ -1057,35 +922,35 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
UnavailablePred->getTerminator());
AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
}
-
+
// Now we know that each predecessor of this block has a value in
// AvailablePreds, sort them for efficient access as we're walking the preds.
array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
-
+
// Create a PHI node at the start of the block for the PRE'd load value.
PHINode *PN = PHINode::Create(LI->getType(), "", LoadBB->begin());
PN->takeName(LI);
-
+
// Insert new entries into the PHI for each predecessor. A single block may
// have multiple entries here.
for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E;
++PI) {
BasicBlock *P = *PI;
- AvailablePredsTy::iterator I =
+ AvailablePredsTy::iterator I =
std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
std::make_pair(P, (Value*)0));
-
+
assert(I != AvailablePreds.end() && I->first == P &&
"Didn't find entry for predecessor!");
-
+
PN->addIncoming(I->second, I->first);
}
-
+
//cerr << "PRE: " << *LI << *PN << "\n";
-
+
LI->replaceAllUsesWith(PN);
LI->eraseFromParent();
-
+
return true;
}
@@ -1097,7 +962,7 @@ FindMostPopularDest(BasicBlock *BB,
const SmallVectorImpl<std::pair<BasicBlock*,
BasicBlock*> > &PredToDestList) {
assert(!PredToDestList.empty());
-
+
// Determine popularity. If there are multiple possible destinations, we
// explicitly choose to ignore 'undef' destinations. We prefer to thread
// blocks with known and real destinations to threading undef. We'll handle
@@ -1106,13 +971,13 @@ FindMostPopularDest(BasicBlock *BB,
for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
if (PredToDestList[i].second)
DestPopularity[PredToDestList[i].second]++;
-
+
// Find the most popular dest.
DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
BasicBlock *MostPopularDest = DPI->first;
unsigned Popularity = DPI->second;
SmallVector<BasicBlock*, 4> SamePopularity;
-
+
for (++DPI; DPI != DestPopularity.end(); ++DPI) {
// If the popularity of this entry isn't higher than the popularity we've
// seen so far, ignore it.
@@ -1126,10 +991,10 @@ FindMostPopularDest(BasicBlock *BB,
SamePopularity.clear();
MostPopularDest = DPI->first;
Popularity = DPI->second;
- }
+ }
}
-
- // Okay, now we know the most popular destination. If there is more than
+
+ // Okay, now we know the most popular destination. If there is more than one
// destination, we need to determine one. This is arbitrary, but we need
// to make a deterministic decision. Pick the first one that appears in the
// successor list.
@@ -1138,105 +1003,105 @@ FindMostPopularDest(BasicBlock *BB,
TerminatorInst *TI = BB->getTerminator();
for (unsigned i = 0; ; ++i) {
assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
-
+
if (std::find(SamePopularity.begin(), SamePopularity.end(),
TI->getSuccessor(i)) == SamePopularity.end())
continue;
-
+
MostPopularDest = TI->getSuccessor(i);
break;
}
}
-
+
// Okay, we have finally picked the most popular destination.
return MostPopularDest;
}
-bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
+bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
+ ConstantPreference Preference) {
// If threading this would thread across a loop header, don't even try to
// thread the edge.
if (LoopHeaders.count(BB))
return false;
-
- SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues;
- if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues))
+
+ PredValueInfoTy PredValues;
+ if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference))
return false;
-
+
assert(!PredValues.empty() &&
"ComputeValueKnownInPredecessors returned true with no values");
DEBUG(dbgs() << "IN BB: " << *BB;
for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
- dbgs() << " BB '" << BB->getName() << "': FOUND condition = ";
- if (PredValues[i].first)
- dbgs() << *PredValues[i].first;
- else
- dbgs() << "UNDEF";
- dbgs() << " for pred '" << PredValues[i].second->getName()
- << "'.\n";
+ dbgs() << " BB '" << BB->getName() << "': FOUND condition = "
+ << *PredValues[i].first
+ << " for pred '" << PredValues[i].second->getName() << "'.\n";
});
-
+
// Decide what we want to thread through. Convert our list of known values to
// a list of known destinations for each pred. This also discards duplicate
// predecessors and keeps track of the undefined inputs (which are represented
// as a null dest in the PredToDestList).
SmallPtrSet<BasicBlock*, 16> SeenPreds;
SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
-
+
BasicBlock *OnlyDest = 0;
BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
-
+
for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
BasicBlock *Pred = PredValues[i].second;
if (!SeenPreds.insert(Pred))
continue; // Duplicate predecessor entry.
-
+
// If the predecessor ends with an indirect goto, we can't change its
// destination.
if (isa<IndirectBrInst>(Pred->getTerminator()))
continue;
-
- ConstantInt *Val = PredValues[i].first;
-
+
+ Constant *Val = PredValues[i].first;
+
BasicBlock *DestBB;
- if (Val == 0) // Undef.
+ if (isa<UndefValue>(Val))
DestBB = 0;
else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
- DestBB = BI->getSuccessor(Val->isZero());
+ DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
+ else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
+ DestBB = SI->getSuccessor(SI->findCaseValue(cast<ConstantInt>(Val)));
else {
- SwitchInst *SI = cast<SwitchInst>(BB->getTerminator());
- DestBB = SI->getSuccessor(SI->findCaseValue(Val));
+ assert(isa<IndirectBrInst>(BB->getTerminator())
+ && "Unexpected terminator");
+ DestBB = cast<BlockAddress>(Val)->getBasicBlock();
}
// If we have exactly one destination, remember it for efficiency below.
- if (i == 0)
+ if (PredToDestList.empty())
OnlyDest = DestBB;
else if (OnlyDest != DestBB)
OnlyDest = MultipleDestSentinel;
-
+
PredToDestList.push_back(std::make_pair(Pred, DestBB));
}
-
+
// If all edges were unthreadable, we fail.
if (PredToDestList.empty())
return false;
-
+
// Determine which is the most common successor. If we have many inputs and
// this block is a switch, we want to start by threading the batch that goes
// to the most popular destination first. If we only know about one
// threadable destination (the common case) we can avoid this.
BasicBlock *MostPopularDest = OnlyDest;
-
+
if (MostPopularDest == MultipleDestSentinel)
MostPopularDest = FindMostPopularDest(BB, PredToDestList);
-
+
// Now that we know what the most popular destination is, factor all
// predecessors that will jump to it into a single predecessor.
SmallVector<BasicBlock*, 16> PredsToFactor;
for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
if (PredToDestList[i].second == MostPopularDest) {
BasicBlock *Pred = PredToDestList[i].first;
-
+
// This predecessor may be a switch or something else that has multiple
// edges to the block. Factor each of these edges by listing them
// according to # occurrences in PredsToFactor.
@@ -1251,7 +1116,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
if (MostPopularDest == 0)
MostPopularDest = BB->getTerminator()->
getSuccessor(GetBestDestForJumpOnUndef(BB));
-
+
// Ok, try to thread it!
return ThreadEdge(BB, PredsToFactor, MostPopularDest);
}
@@ -1259,15 +1124,15 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
/// a PHI node in the current block. See if there are any simplifications we
/// can do based on inputs to the phi node.
-///
+///
bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) {
BasicBlock *BB = PN->getParent();
-
+
// TODO: We could make use of this to do it once for blocks with common PHI
// values.
SmallVector<BasicBlock*, 1> PredBBs;
PredBBs.resize(1);
-
+
// If any of the predecessor blocks end in an unconditional branch, we can
// *duplicate* the conditional branch into that block in order to further
// encourage jump threading and to eliminate cases where we have branch on a
@@ -1289,21 +1154,21 @@ bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) {
/// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on
/// a xor instruction in the current block. See if there are any
/// simplifications we can do based on inputs to the xor.
-///
+///
bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
BasicBlock *BB = BO->getParent();
-
+
// If either the LHS or RHS of the xor is a constant, don't do this
// optimization.
if (isa<ConstantInt>(BO->getOperand(0)) ||
isa<ConstantInt>(BO->getOperand(1)))
return false;
-
+
// If the first instruction in BB isn't a phi, we won't be able to infer
// anything special about any particular predecessor.
if (!isa<PHINode>(BB->front()))
return false;
-
+
// If we have a xor as the branch input to this block, and we know that the
// LHS or RHS of the xor in any predecessor is true/false, then we can clone
// the condition into the predecessor and fix that value to true, saving some
@@ -1322,15 +1187,17 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// %Y = icmp ne i32 %A, %B
// br i1 %Z, ...
- SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> XorOpValues;
+ PredValueInfoTy XorOpValues;
bool isLHS = true;
- if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues)) {
+ if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
+ WantInteger)) {
assert(XorOpValues.empty());
- if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues))
+ if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
+ WantInteger))
return false;
isLHS = false;
}
-
+
assert(!XorOpValues.empty() &&
"ComputeValueKnownInPredecessors returned true with no values");
@@ -1338,29 +1205,33 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// predecessors can be of the set true, false, or undef.
unsigned NumTrue = 0, NumFalse = 0;
for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
- if (!XorOpValues[i].first) continue; // Ignore undefs for the count.
- if (XorOpValues[i].first->isZero())
+ if (isa<UndefValue>(XorOpValues[i].first))
+ // Ignore undefs for the count.
+ continue;
+ if (cast<ConstantInt>(XorOpValues[i].first)->isZero())
++NumFalse;
else
++NumTrue;
}
-
+
// Determine which value to split on, true, false, or undef if neither.
ConstantInt *SplitVal = 0;
if (NumTrue > NumFalse)
SplitVal = ConstantInt::getTrue(BB->getContext());
else if (NumTrue != 0 || NumFalse != 0)
SplitVal = ConstantInt::getFalse(BB->getContext());
-
+
// Collect all of the blocks that this can be folded into so that we can
// factor this once and clone it once.
SmallVector<BasicBlock*, 8> BlocksToFoldInto;
for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
- if (XorOpValues[i].first != SplitVal && XorOpValues[i].first != 0) continue;
+ if (XorOpValues[i].first != SplitVal &&
+ !isa<UndefValue>(XorOpValues[i].first))
+ continue;
BlocksToFoldInto.push_back(XorOpValues[i].second);
}
-
+
// If we inferred a value for all of the predecessors, then duplication won't
// help us. However, we can just replace the LHS or RHS with the constant.
if (BlocksToFoldInto.size() ==
@@ -1377,10 +1248,10 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// If all preds provide 1, set the computed value to 1.
BO->setOperand(!isLHS, SplitVal);
}
-
+
return true;
}
-
+
// Try to duplicate BB into PredBB.
return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
}
@@ -1398,14 +1269,14 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
// Ok, we have a PHI node. Figure out what the incoming value was for the
// DestBlock.
Value *IV = PN->getIncomingValueForBlock(OldPred);
-
+
// Remap the value if necessary.
if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst);
if (I != ValueMap.end())
IV = I->second;
}
-
+
PN->addIncoming(IV, NewPred);
}
}
@@ -1413,8 +1284,8 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
/// ThreadEdge - We have decided that it is safe and profitable to factor the
/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
/// across BB. Transform the IR to reflect this change.
-bool JumpThreading::ThreadEdge(BasicBlock *BB,
- const SmallVectorImpl<BasicBlock*> &PredBBs,
+bool JumpThreading::ThreadEdge(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock*> &PredBBs,
BasicBlock *SuccBB) {
// If threading to the same block as we come from, we would infinite loop.
if (SuccBB == BB) {
@@ -1422,7 +1293,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
<< "' - would thread to self!\n");
return false;
}
-
+
// If threading this would thread across a loop header, don't thread the edge.
// See the comments above FindLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB)) {
@@ -1438,7 +1309,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
<< "' - Cost is too high: " << JumpThreadCost << "\n");
return false;
}
-
+
// And finally, do it! Start by factoring the predecessors is needed.
BasicBlock *PredBB;
if (PredBBs.size() == 1)
@@ -1449,30 +1320,29 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
".thr_comm", this);
}
-
+
// And finally, do it!
DEBUG(dbgs() << " Threading edge from '" << PredBB->getName() << "' to '"
<< SuccBB->getName() << "' with cost: " << JumpThreadCost
<< ", across block:\n "
<< *BB << "\n");
-
- if (LVI)
- LVI->threadEdge(PredBB, BB, SuccBB);
-
+
+ LVI->threadEdge(PredBB, BB, SuccBB);
+
// We are going to have to map operands from the original BB block to the new
// copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
// account for entry from PredBB.
DenseMap<Instruction*, Value*> ValueMapping;
-
- BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
- BB->getName()+".thread",
+
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
+ BB->getName()+".thread",
BB->getParent(), BB);
NewBB->moveAfter(PredBB);
-
+
BasicBlock::iterator BI = BB->begin();
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
-
+
// Clone the non-phi instructions of BB into NewBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions.
for (; !isa<TerminatorInst>(BI); ++BI) {
@@ -1480,7 +1350,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
New->setName(BI->getName());
NewBB->getInstList().push_back(New);
ValueMapping[BI] = New;
-
+
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
@@ -1489,15 +1359,15 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
New->setOperand(i, I->second);
}
}
-
+
// We didn't copy the terminator from BB over to NewBB, because there is now
// an unconditional jump to SuccBB. Insert the unconditional jump.
BranchInst::Create(SuccBB, NewBB);
-
+
// Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
// PHI nodes for NewBB now.
AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
-
+
// If there were values defined in BB that are used outside the block, then we
// now have to update all uses of the value to use either the original value,
// the cloned value, or some PHI derived value. This can require arbitrary
@@ -1515,14 +1385,14 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
continue;
} else if (User->getParent() == BB)
continue;
-
+
UsesToRename.push_back(&UI.getUse());
}
-
+
// If there are no uses outside the block, we're done with this instruction.
if (UsesToRename.empty())
continue;
-
+
DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
// We found a use of I outside of BB. Rename all uses of I that are outside
@@ -1531,28 +1401,28 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
SSAUpdate.Initialize(I->getType(), I->getName());
SSAUpdate.AddAvailableValue(BB, I);
SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
-
+
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
DEBUG(dbgs() << "\n");
}
-
-
+
+
// Ok, NewBB is good to go. Update the terminator of PredBB to jump to
// NewBB instead of BB. This eliminates predecessors from BB, which requires
// us to simplify any PHI nodes in BB.
TerminatorInst *PredTerm = PredBB->getTerminator();
for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
if (PredTerm->getSuccessor(i) == BB) {
- RemovePredecessorAndSimplify(BB, PredBB, TD);
+ BB->removePredecessor(PredBB, true);
PredTerm->setSuccessor(i, NewBB);
}
-
+
// At this point, the IR is fully up to date and consistent. Do a quick scan
// over the new instructions and zap any that are constants or dead. This
// frequently happens because of phi translation.
SimplifyInstructionsInBlock(NewBB, TD);
-
+
// Threaded an edge!
++NumThreads;
return true;
@@ -1576,14 +1446,14 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
<< "' - it might create an irreducible loop!\n");
return false;
}
-
+
unsigned DuplicationCost = getJumpThreadDuplicationCost(BB);
if (DuplicationCost > Threshold) {
DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
<< "' - Cost is too high: " << DuplicationCost << "\n");
return false;
}
-
+
// And finally, do it! Start by factoring the predecessors is needed.
BasicBlock *PredBB;
if (PredBBs.size() == 1)
@@ -1594,35 +1464,35 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
".thr_comm", this);
}
-
+
// Okay, we decided to do this! Clone all the instructions in BB onto the end
// of PredBB.
DEBUG(dbgs() << " Duplicating block '" << BB->getName() << "' into end of '"
<< PredBB->getName() << "' to eliminate branch on phi. Cost: "
<< DuplicationCost << " block is:" << *BB << "\n");
-
+
// Unless PredBB ends with an unconditional branch, split the edge so that we
// can just clone the bits from BB into the end of the new PredBB.
BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
-
+
if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) {
PredBB = SplitEdge(PredBB, BB, this);
OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
}
-
+
// We are going to have to map operands from the original BB block into the
// PredBB block. Evaluate PHI nodes in BB.
DenseMap<Instruction*, Value*> ValueMapping;
-
+
BasicBlock::iterator BI = BB->begin();
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
-
+
// Clone the non-phi instructions of BB into PredBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions.
for (; BI != BB->end(); ++BI) {
Instruction *New = BI->clone();
-
+
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
@@ -1644,7 +1514,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
ValueMapping[BI] = New;
}
}
-
+
// Check to see if the targets of the branch had PHI nodes. If so, we need to
// add entries to the PHI nodes for branch from PredBB now.
BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
@@ -1652,7 +1522,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
ValueMapping);
AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
ValueMapping);
-
+
// If there were values defined in BB that are used outside the block, then we
// now have to update all uses of the value to use either the original value,
// the cloned value, or some PHI derived value. This can require arbitrary
@@ -1670,35 +1540,35 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
continue;
} else if (User->getParent() == BB)
continue;
-
+
UsesToRename.push_back(&UI.getUse());
}
-
+
// If there are no uses outside the block, we're done with this instruction.
if (UsesToRename.empty())
continue;
-
+
DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
-
+
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
SSAUpdate.Initialize(I->getType(), I->getName());
SSAUpdate.AddAvailableValue(BB, I);
SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
-
+
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
DEBUG(dbgs() << "\n");
}
-
+
// PredBB no longer jumps to BB, remove entries in the PHI node for the edge
// that we nuked.
- RemovePredecessorAndSimplify(BB, PredBB, TD);
-
+ BB->removePredecessor(PredBB, true);
+
// Remove the unconditional branch at the end of the PredBB block.
OldPredBranch->eraseFromParent();
-
+
++NumDupes;
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index 2ef8544..0786793 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -36,13 +36,13 @@
#include "llvm/DerivedTypes.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Support/CFG.h"
@@ -66,7 +66,9 @@ DisablePromotion("disable-licm-promotion", cl::Hidden,
namespace {
struct LICM : public LoopPass {
static char ID; // Pass identification, replacement for typeid
- LICM() : LoopPass(ID) {}
+ LICM() : LoopPass(ID) {
+ initializeLICMPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -80,7 +82,7 @@ namespace {
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<AliasAnalysis>();
AU.addPreserved<AliasAnalysis>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved("scalar-evolution");
AU.addPreservedID(LoopSimplifyID);
}
@@ -129,42 +131,7 @@ namespace {
///
bool inSubLoop(BasicBlock *BB) {
assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
- for (Loop::iterator I = CurLoop->begin(), E = CurLoop->end(); I != E; ++I)
- if ((*I)->contains(BB))
- return true; // A subloop actually contains this block!
- return false;
- }
-
- /// isExitBlockDominatedByBlockInLoop - This method checks to see if the
- /// specified exit block of the loop is dominated by the specified block
- /// that is in the body of the loop. We use these constraints to
- /// dramatically limit the amount of the dominator tree that needs to be
- /// searched.
- bool isExitBlockDominatedByBlockInLoop(BasicBlock *ExitBlock,
- BasicBlock *BlockInLoop) const {
- // If the block in the loop is the loop header, it must be dominated!
- BasicBlock *LoopHeader = CurLoop->getHeader();
- if (BlockInLoop == LoopHeader)
- return true;
-
- DomTreeNode *BlockInLoopNode = DT->getNode(BlockInLoop);
- DomTreeNode *IDom = DT->getNode(ExitBlock);
-
- // Because the exit block is not in the loop, we know we have to get _at
- // least_ its immediate dominator.
- IDom = IDom->getIDom();
-
- while (IDom && IDom != BlockInLoopNode) {
- // If we have got to the header of the loop, then the instructions block
- // did not dominate the exit node, so we can't hoist it.
- if (IDom->getBlock() == LoopHeader)
- return false;
-
- // Get next Immediate Dominator.
- IDom = IDom->getIDom();
- };
-
- return true;
+ return LI->getLoopFor(BB) != CurLoop;
}
/// sink - When an instruction is found to only be used outside of the loop,
@@ -187,13 +154,13 @@ namespace {
/// pointerInvalidatedByLoop - Return true if the body of this loop may
/// store into the memory location pointed to by V.
///
- bool pointerInvalidatedByLoop(Value *V, unsigned Size) {
+ bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
+ const MDNode *TBAAInfo) {
// Check to see if any of the basic blocks in CurLoop invalidate *V.
- return CurAST->getAliasSetForPointer(V, Size).isMod();
+ return CurAST->getAliasSetForPointer(V, Size, TBAAInfo).isMod();
}
bool canSinkOrHoistInst(Instruction &I);
- bool isLoopInvariantInst(Instruction &I);
bool isNotUsedInLoop(Instruction &I);
void PromoteAliasSet(AliasSet &AS);
@@ -201,7 +168,12 @@ namespace {
}
char LICM::ID = 0;
-INITIALIZE_PASS(LICM, "licm", "Loop Invariant Code Motion", false, false);
+INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
Pass *llvm::createLICMPass() { return new LICM(); }
@@ -369,7 +341,7 @@ void LICM::HoistRegion(DomTreeNode *N) {
// if all of the operands of the instruction are loop invariant and if it
// is safe to hoist the instruction.
//
- if (isLoopInvariantInst(I) && canSinkOrHoistInst(I) &&
+ if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I) &&
isSafeToExecuteUnconditionally(I))
hoist(I);
}
@@ -394,16 +366,17 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
return true;
// Don't hoist loads which have may-aliased stores in loop.
- unsigned Size = 0;
+ uint64_t Size = 0;
if (LI->getType()->isSized())
Size = AA->getTypeStoreSize(LI->getType());
- return !pointerInvalidatedByLoop(LI->getOperand(0), Size);
+ return !pointerInvalidatedByLoop(LI->getOperand(0), Size,
+ LI->getMetadata(LLVMContext::MD_tbaa));
} else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
// Handle obvious cases efficiently.
AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI);
if (Behavior == AliasAnalysis::DoesNotAccessMemory)
return true;
- else if (Behavior == AliasAnalysis::OnlyReadsMemory) {
+ if (AliasAnalysis::onlyReadsMemory(Behavior)) {
// If this call only reads from memory and there are no writes to memory
// in the loop, we can hoist or sink the call as appropriate.
bool FoundMod = false;
@@ -452,20 +425,6 @@ bool LICM::isNotUsedInLoop(Instruction &I) {
}
-/// isLoopInvariantInst - Return true if all operands of this instruction are
-/// loop invariant. We also filter out non-hoistable instructions here just for
-/// efficiency.
-///
-bool LICM::isLoopInvariantInst(Instruction &I) {
- // The instruction is loop invariant if all of its operands are loop-invariant
- for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
- if (!CurLoop->isLoopInvariant(I.getOperand(i)))
- return false;
-
- // If we got this far, the instruction is loop invariant!
- return true;
-}
-
/// sink - When an instruction is found to only be used outside of the loop,
/// this function moves it to the exit blocks and patches up SSA form as needed.
/// This method is guaranteed to remove the original instruction from its
@@ -486,7 +445,7 @@ void LICM::sink(Instruction &I) {
// enough that we handle it as a special (more efficient) case. It is more
// efficient to handle because there are no PHI nodes that need to be placed.
if (ExitBlocks.size() == 1) {
- if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[0], I.getParent())) {
+ if (!DT->dominates(I.getParent(), ExitBlocks[0])) {
// Instruction is not used, just delete it.
CurAST->deleteValue(&I);
// If I has users in unreachable blocks, eliminate.
@@ -537,7 +496,7 @@ void LICM::sink(Instruction &I) {
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBlock = ExitBlocks[i];
- if (!isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB))
+ if (!DT->dominates(InstOrigBB, ExitBlock))
continue;
// Insert the code after the last PHI node.
@@ -628,15 +587,61 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
SmallVector<BasicBlock*, 8> ExitBlocks;
CurLoop->getExitBlocks(ExitBlocks);
- // For each exit block, get the DT node and walk up the DT until the
- // instruction's basic block is found or we exit the loop.
+ // Verify that the block dominates each of the exit blocks of the loop.
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[i], Inst.getParent()))
+ if (!DT->dominates(Inst.getParent(), ExitBlocks[i]))
return false;
return true;
}
+namespace {
+ class LoopPromoter : public LoadAndStorePromoter {
+ Value *SomePtr; // Designated pointer to store to.
+ SmallPtrSet<Value*, 4> &PointerMustAliases;
+ SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
+ AliasSetTracker &AST;
+ public:
+ LoopPromoter(Value *SP,
+ const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
+ SmallPtrSet<Value*, 4> &PMA,
+ SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast)
+ : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
+ LoopExitBlocks(LEB), AST(ast) {}
+
+ virtual bool isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &) const {
+ Value *Ptr;
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ Ptr = LI->getOperand(0);
+ else
+ Ptr = cast<StoreInst>(I)->getPointerOperand();
+ return PointerMustAliases.count(Ptr);
+ }
+
+ virtual void doExtraRewritesBeforeFinalDeletion() const {
+ // Insert stores after in the loop exit blocks. Each exit block gets a
+ // store of the live-out values that feed them. Since we've already told
+ // the SSA updater about the defs in the loop and the preheader
+ // definition, it is all set and we can start using it.
+ for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = LoopExitBlocks[i];
+ Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+ Instruction *InsertPos = ExitBlock->getFirstNonPHI();
+ new StoreInst(LiveInValue, SomePtr, InsertPos);
+ }
+ }
+
+ virtual void replaceLoadWithValue(LoadInst *LI, Value *V) const {
+ // Update alias analysis.
+ AST.copyValue(LI, V);
+ }
+ virtual void instructionDeleted(Instruction *I) const {
+ AST.deleteValue(I);
+ }
+ };
+} // end anon namespace
+
/// PromoteAliasSet - Try to promote memory values to scalars by sinking
/// stores out of the loop and moving loads to before the loop. We do this by
/// looping over the stores in the loop, looking for stores to Must pointers
@@ -697,8 +702,11 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
if (isa<LoadInst>(Use))
assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
else if (isa<StoreInst>(Use)) {
+ // Stores *of* the pointer are not interesting, only stores *to* the
+ // pointer.
+ if (Use->getOperand(1) != ASIV)
+ continue;
assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
- if (Use->getOperand(0) == ASIV) return;
} else
return; // Not a load or store.
@@ -718,179 +726,43 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
Changed = true;
++NumPromoted;
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+
// We use the SSAUpdater interface to insert phi nodes as required.
SmallVector<PHINode*, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
+ LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
+ *CurAST);
- // It wants to know some value of the same type as what we'll be inserting.
- Value *SomeValue;
- if (isa<LoadInst>(LoopUses[0]))
- SomeValue = LoopUses[0];
- else
- SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0);
- SSA.Initialize(SomeValue->getType(), SomeValue->getName());
-
- // First step: bucket up uses of the pointers by the block they occur in.
- // This is important because we have to handle multiple defs/uses in a block
- // ourselves: SSAUpdater is purely for cross-block references.
- // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element.
- DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock;
- for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
- Instruction *User = LoopUses[i];
- UsesByBlock[User->getParent()].push_back(User);
- }
-
- // Okay, now we can iterate over all the blocks in the loop with uses,
- // processing them. Keep track of which loads are loading a live-in value.
- SmallVector<LoadInst*, 32> LiveInLoads;
- DenseMap<Value*, Value*> ReplacedLoads;
-
- for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) {
- Instruction *User = LoopUses[LoopUse];
- std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()];
-
- // If this block has already been processed, ignore this repeat use.
- if (BlockUses.empty()) continue;
-
- // Okay, this is the first use in the block. If this block just has a
- // single user in it, we can rewrite it trivially.
- if (BlockUses.size() == 1) {
- // If it is a store, it is a trivial def of the value in the block.
- if (isa<StoreInst>(User)) {
- SSA.AddAvailableValue(User->getParent(),
- cast<StoreInst>(User)->getOperand(0));
- } else {
- // Otherwise it is a load, queue it to rewrite as a live-in load.
- LiveInLoads.push_back(cast<LoadInst>(User));
- }
- BlockUses.clear();
- continue;
- }
-
- // Otherwise, check to see if this block is all loads. If so, we can queue
- // them all as live in loads.
- bool HasStore = false;
- for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
- if (isa<StoreInst>(BlockUses[i])) {
- HasStore = true;
- break;
- }
- }
-
- if (!HasStore) {
- for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
- LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
- BlockUses.clear();
- continue;
- }
-
- // Otherwise, we have mixed loads and stores (or just a bunch of stores).
- // Since SSAUpdater is purely for cross-block values, we need to determine
- // the order of these instructions in the block. If the first use in the
- // block is a load, then it uses the live in value. The last store defines
- // the live out value. We handle this by doing a linear scan of the block.
- BasicBlock *BB = User->getParent();
- Value *StoredValue = 0;
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
- if (LoadInst *L = dyn_cast<LoadInst>(II)) {
- // If this is a load from an unrelated pointer, ignore it.
- if (!PointerMustAliases.count(L->getOperand(0))) continue;
-
- // If we haven't seen a store yet, this is a live in use, otherwise
- // use the stored value.
- if (StoredValue) {
- L->replaceAllUsesWith(StoredValue);
- ReplacedLoads[L] = StoredValue;
- } else {
- LiveInLoads.push_back(L);
- }
- continue;
- }
-
- if (StoreInst *S = dyn_cast<StoreInst>(II)) {
- // If this is a store to an unrelated pointer, ignore it.
- if (!PointerMustAliases.count(S->getOperand(1))) continue;
-
- // Remember that this is the active value in the block.
- StoredValue = S->getOperand(0);
- }
- }
-
- // The last stored value that happened is the live-out for the block.
- assert(StoredValue && "Already checked that there is a store in block");
- SSA.AddAvailableValue(BB, StoredValue);
- BlockUses.clear();
- }
-
- // Now that all the intra-loop values are classified, set up the preheader.
- // It gets a load of the pointer we're promoting, and it is the live-out value
- // from the preheader.
- LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted",
- Preheader->getTerminator());
+ // Set up the preheader to have a definition of the value. It is the live-out
+ // value from the preheader that uses in the loop will use.
+ LoadInst *PreheaderLoad =
+ new LoadInst(SomePtr, SomePtr->getName()+".promoted",
+ Preheader->getTerminator());
SSA.AddAvailableValue(Preheader, PreheaderLoad);
- // Now that the preheader is good to go, set up the exit blocks. Each exit
- // block gets a store of the live-out values that feed them. Since we've
- // already told the SSA updater about the defs in the loop and the preheader
- // definition, it is all set and we can start using it.
- SmallVector<BasicBlock*, 8> ExitBlocks;
- CurLoop->getUniqueExitBlocks(ExitBlocks);
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
- BasicBlock *ExitBlock = ExitBlocks[i];
- Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
- Instruction *InsertPos = ExitBlock->getFirstNonPHI();
- new StoreInst(LiveInValue, SomePtr, InsertPos);
+ // Copy any value stored to or loaded from a must-alias of the pointer.
+ if (PreheaderLoad->getType()->isPointerTy()) {
+ Value *SomeValue;
+ if (LoadInst *LI = dyn_cast<LoadInst>(LoopUses[0]))
+ SomeValue = LI;
+ else
+ SomeValue = cast<StoreInst>(LoopUses[0])->getValueOperand();
+
+ CurAST->copyValue(SomeValue, PreheaderLoad);
}
- // Okay, now we rewrite all loads that use live-in values in the loop,
- // inserting PHI nodes as necessary.
- for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
- LoadInst *ALoad = LiveInLoads[i];
- Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
- ALoad->replaceAllUsesWith(NewVal);
- CurAST->copyValue(ALoad, NewVal);
- ReplacedLoads[ALoad] = NewVal;
- }
+ // Rewrite all the loads in the loop and remember all the definitions from
+ // stores in the loop.
+ Promoter.run(LoopUses);
// If the preheader load is itself a pointer, we need to tell alias analysis
// about the new pointer we created in the preheader block and about any PHI
// nodes that just got inserted.
if (PreheaderLoad->getType()->isPointerTy()) {
- // Copy any value stored to or loaded from a must-alias of the pointer.
- CurAST->copyValue(SomeValue, PreheaderLoad);
-
for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
- CurAST->copyValue(SomeValue, NewPHIs[i]);
- }
-
- // Now that everything is rewritten, delete the old instructions from the body
- // of the loop. They should all be dead now.
- for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
- Instruction *User = LoopUses[i];
-
- // If this is a load that still has uses, then the load must have been added
- // as a live value in the SSAUpdate data structure for a block (e.g. because
- // the loaded value was stored later). In this case, we need to recursively
- // propagate the updates until we get to the real value.
- if (!User->use_empty()) {
- Value *NewVal = ReplacedLoads[User];
- assert(NewVal && "not a replaced load?");
-
- // Propagate down to the ultimate replacee. The intermediately loads
- // could theoretically already have been deleted, so we don't want to
- // dereference the Value*'s.
- DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
- while (RLI != ReplacedLoads.end()) {
- NewVal = RLI->second;
- RLI = ReplacedLoads.find(NewVal);
- }
-
- User->replaceAllUsesWith(NewVal);
- CurAST->copyValue(User, NewVal);
- }
-
- CurAST->deleteValue(User);
- User->eraseFromParent();
+ CurAST->copyValue(PreheaderLoad, NewPHIs[i]);
}
// fwew, we're done!
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index 543dfc1..6d1d344 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -17,6 +17,7 @@
#define DEBUG_TYPE "loop-delete"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/SmallVector.h"
@@ -28,7 +29,9 @@ namespace {
class LoopDeletion : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopDeletion() : LoopPass(ID) {}
+ LoopDeletion() : LoopPass(ID) {
+ initializeLoopDeletionPass(*PassRegistry::getPassRegistry());
+ }
// Possibly eliminate loop L if it is dead.
bool runOnLoop(Loop* L, LPPassManager& LPM);
@@ -49,14 +52,20 @@ namespace {
AU.addPreserved<LoopInfo>();
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
- AU.addPreserved<DominanceFrontier>();
}
};
}
char LoopDeletion::ID = 0;
-INITIALIZE_PASS(LoopDeletion, "loop-deletion",
- "Delete dead loops", false, false);
+INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
+ "Delete dead loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopDeletion, "loop-deletion",
+ "Delete dead loops", false, false)
Pass* llvm::createLoopDeletionPass() {
return new LoopDeletion();
@@ -183,22 +192,19 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
// Update the dominator tree and remove the instructions and blocks that will
// be deleted from the reference counting scheme.
DominatorTree& DT = getAnalysis<DominatorTree>();
- DominanceFrontier* DF = getAnalysisIfAvailable<DominanceFrontier>();
- SmallPtrSet<DomTreeNode*, 8> ChildNodes;
+ SmallVector<DomTreeNode*, 8> ChildNodes;
for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
LI != LE; ++LI) {
// Move all of the block's children to be children of the preheader, which
// allows us to remove the domtree entry for the block.
- ChildNodes.insert(DT[*LI]->begin(), DT[*LI]->end());
- for (SmallPtrSet<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(),
+ ChildNodes.insert(ChildNodes.begin(), DT[*LI]->begin(), DT[*LI]->end());
+ for (SmallVector<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(),
DE = ChildNodes.end(); DI != DE; ++DI) {
DT.changeImmediateDominator(*DI, DT[preheader]);
- if (DF) DF->changeImmediateDominator((*DI)->getBlock(), preheader, &DT);
}
ChildNodes.clear();
DT.eraseNode(*LI);
- if (DF) DF->removeBlock(*LI);
// Remove the block from the reference counting scheme, so that we can
// delete it freely later.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
new file mode 100644
index 0000000..d7fa149
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -0,0 +1,594 @@
+//===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements an idiom recognizer that transforms simple loops into a
+// non-loop form. In cases that this kicks in, it can be a significant
+// performance win.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO List:
+//
+// Future loop memory idioms to recognize:
+// memcmp, memmove, strlen, etc.
+// Future floating point idioms to recognize in -ffast-math mode:
+// fpowi
+// Future integer operation idioms to recognize:
+// ctpop, ctlz, cttz
+//
+// Beware that isel's default lowering for ctpop is highly inefficient for
+// i64 and larger types when i64 is legal and the value has few bits set. It
+// would be good to enhance isel to emit a loop for ctpop in this case.
+//
+// We should enhance the memset/memcpy recognition to handle multiple stores in
+// the loop. This would handle things like:
+// void foo(_Complex float *P)
+// for (i) { __real__(*P) = 0; __imag__(*P) = 0; }
+//
+// This could recognize common matrix multiplies and dot product idioms and
+// replace them with calls to BLAS (if linked in??).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-idiom"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
+STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
+
+namespace {
+ class LoopIdiomRecognize : public LoopPass {
+ Loop *CurLoop;
+ const TargetData *TD;
+ DominatorTree *DT;
+ ScalarEvolution *SE;
+ TargetLibraryInfo *TLI;
+ public:
+ static char ID;
+ explicit LoopIdiomRecognize() : LoopPass(ID) {
+ initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+ SmallVectorImpl<BasicBlock*> &ExitBlocks);
+
+ bool processLoopStore(StoreInst *SI, const SCEV *BECount);
+ bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
+
+ bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+ unsigned StoreAlignment,
+ Value *SplatValue, Instruction *TheStore,
+ const SCEVAddRecExpr *Ev,
+ const SCEV *BECount);
+ bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
+ const SCEVAddRecExpr *StoreEv,
+ const SCEVAddRecExpr *LoadEv,
+ const SCEV *BECount);
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG.
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<DominatorTree>();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<TargetLibraryInfo>();
+ }
+ };
+}
+
+char LoopIdiomRecognize::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
+ false, false)
+
+Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
+
+/// DeleteDeadInstruction - Delete this instruction. Before we do, go through
+/// and zero out all the operands of this instruction. If any of them become
+/// dead, delete them and the computation tree that feeds them.
+///
+static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
+ SmallVector<Instruction*, 32> NowDeadInsts;
+
+ NowDeadInsts.push_back(I);
+
+ // Before we touch this instruction, remove it from SE!
+ do {
+ Instruction *DeadInst = NowDeadInsts.pop_back_val();
+
+ // This instruction is dead, zap it, in stages. Start by removing it from
+ // SCEV.
+ SE.forgetValue(DeadInst);
+
+ for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
+ Value *Op = DeadInst->getOperand(op);
+ DeadInst->setOperand(op, 0);
+
+ // If this operand just became dead, add it to the NowDeadInsts list.
+ if (!Op->use_empty()) continue;
+
+ if (Instruction *OpI = dyn_cast<Instruction>(Op))
+ if (isInstructionTriviallyDead(OpI))
+ NowDeadInsts.push_back(OpI);
+ }
+
+ DeadInst->eraseFromParent();
+
+ } while (!NowDeadInsts.empty());
+}
+
+bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
+ CurLoop = L;
+
+ // The trip count of the loop must be analyzable.
+ SE = &getAnalysis<ScalarEvolution>();
+ if (!SE->hasLoopInvariantBackedgeTakenCount(L))
+ return false;
+ const SCEV *BECount = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BECount)) return false;
+
+ // If this loop executes exactly one time, then it should be peeled, not
+ // optimized by this pass.
+ if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+ if (BECst->getValue()->getValue() == 0)
+ return false;
+
+ // We require target data for now.
+ TD = getAnalysisIfAvailable<TargetData>();
+ if (TD == 0) return false;
+
+ DT = &getAnalysis<DominatorTree>();
+ LoopInfo &LI = getAnalysis<LoopInfo>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+
+ DEBUG(dbgs() << "loop-idiom Scanning: F["
+ << L->getHeader()->getParent()->getName()
+ << "] Loop %" << L->getHeader()->getName() << "\n");
+
+ bool MadeChange = false;
+ // Scan all the blocks in the loop that are not in subloops.
+ for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
+ ++BI) {
+ // Ignore blocks in subloops.
+ if (LI.getLoopFor(*BI) != CurLoop)
+ continue;
+
+ MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks);
+ }
+ return MadeChange;
+}
+
+/// runOnLoopBlock - Process the specified block, which lives in a counted loop
+/// with the specified backedge count. This block is known to be in the current
+/// loop and not in any subloops.
+bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+ SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+ // We can only promote stores in this block if they are unconditionally
+ // executed in the loop. For a block to be unconditionally executed, it has
+ // to dominate all the exit blocks of the loop. Verify this now.
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (!DT->dominates(BB, ExitBlocks[i]))
+ return false;
+
+ bool MadeChange = false;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ Instruction *Inst = I++;
+ // Look for store instructions, which may be optimized to memset/memcpy.
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ WeakVH InstPtr(I);
+ if (!processLoopStore(SI, BECount)) continue;
+ MadeChange = true;
+
+ // If processing the store invalidated our iterator, start over from the
+ // top of the block.
+ if (InstPtr == 0)
+ I = BB->begin();
+ continue;
+ }
+
+ // Look for memset instructions, which may be optimized to a larger memset.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
+ WeakVH InstPtr(I);
+ if (!processLoopMemSet(MSI, BECount)) continue;
+ MadeChange = true;
+
+ // If processing the memset invalidated our iterator, start over from the
+ // top of the block.
+ if (InstPtr == 0)
+ I = BB->begin();
+ continue;
+ }
+ }
+
+ return MadeChange;
+}
+
+
+/// processLoopStore - See if this store can be promoted to a memset or memcpy.
+bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
+ if (SI->isVolatile()) return false;
+
+ Value *StoredVal = SI->getValueOperand();
+ Value *StorePtr = SI->getPointerOperand();
+
+ // Reject stores that are so large that they overflow an unsigned.
+ uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType());
+ if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
+ return false;
+
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided store. If we have something else, it's a
+ // random store we can't handle.
+ const SCEVAddRecExpr *StoreEv =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+ if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
+ return false;
+
+ // Check to see if the stride matches the size of the store. If so, then we
+ // know that every byte is touched in the loop.
+ unsigned StoreSize = (unsigned)SizeInBits >> 3;
+ const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
+
+ // TODO: Could also handle negative stride here someday, that will require the
+ // validity check in mayLoopAccessLocation to be updated though.
+ if (Stride == 0 || StoreSize != Stride->getValue()->getValue())
+ return false;
+
+ // See if we can optimize just this store in isolation.
+ if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
+ StoredVal, SI, StoreEv, BECount))
+ return true;
+
+ // If the stored value is a strided load in the same loop with the same stride
+ // this this may be transformable into a memcpy. This kicks in for stuff like
+ // for (i) A[i] = B[i];
+ if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
+ const SCEVAddRecExpr *LoadEv =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0)));
+ if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() &&
+ StoreEv->getOperand(1) == LoadEv->getOperand(1) && !LI->isVolatile())
+ if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
+ return true;
+ }
+ //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n";
+
+ return false;
+}
+
+/// processLoopMemSet - See if this memset can be promoted to a large memset.
+bool LoopIdiomRecognize::
+processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
+ // We can only handle non-volatile memsets with a constant size.
+ if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false;
+
+ // If we're not allowed to hack on memset, we fail.
+ if (!TLI->has(LibFunc::memset))
+ return false;
+
+ Value *Pointer = MSI->getDest();
+
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided store. If we have something else, it's a
+ // random store we can't handle.
+ const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
+ if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine())
+ return false;
+
+ // Reject memsets that are so large that they overflow an unsigned.
+ uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+ if ((SizeInBytes >> 32) != 0)
+ return false;
+
+ // Check to see if the stride matches the size of the memset. If so, then we
+ // know that every byte is touched in the loop.
+ const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
+
+ // TODO: Could also handle negative stride here someday, that will require the
+ // validity check in mayLoopAccessLocation to be updated though.
+ if (Stride == 0 || MSI->getLength() != Stride->getValue())
+ return false;
+
+ return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
+ MSI->getAlignment(), MSI->getValue(),
+ MSI, Ev, BECount);
+}
+
+
+/// mayLoopAccessLocation - Return true if the specified loop might access the
+/// specified pointer location, which is a loop-strided access. The 'Access'
+/// argument specifies what the verboten forms of access are (read or write).
+static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
+ Loop *L, const SCEV *BECount,
+ unsigned StoreSize, AliasAnalysis &AA,
+ Instruction *IgnoredStore) {
+ // Get the location that may be stored across the loop. Since the access is
+ // strided positively through memory, we say that the modified location starts
+ // at the pointer and has infinite size.
+ uint64_t AccessSize = AliasAnalysis::UnknownSize;
+
+ // If the loop iterates a fixed number of times, we can refine the access size
+ // to be exactly the size of the memset, which is (BECount+1)*StoreSize
+ if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+ AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
+
+ // TODO: For this to be really effective, we have to dive into the pointer
+ // operand in the store. Store to &A[i] of 100 will always return may alias
+ // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
+ // which will then no-alias a store to &A[100].
+ AliasAnalysis::Location StoreLoc(Ptr, AccessSize);
+
+ for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
+ ++BI)
+ for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
+ if (&*I != IgnoredStore &&
+ (AA.getModRefInfo(I, StoreLoc) & Access))
+ return true;
+
+ return false;
+}
+
+/// getMemSetPatternValue - If a strided store of the specified value is safe to
+/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
+/// be passed in. Otherwise, return null.
+///
+/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
+/// just replicate their input array and then pass on to memset_pattern16.
+static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) {
+ // If the value isn't a constant, we can't promote it to being in a constant
+ // array. We could theoretically do a store to an alloca or something, but
+ // that doesn't seem worthwhile.
+ Constant *C = dyn_cast<Constant>(V);
+ if (C == 0) return 0;
+
+ // Only handle simple values that are a power of two bytes in size.
+ uint64_t Size = TD.getTypeSizeInBits(V->getType());
+ if (Size == 0 || (Size & 7) || (Size & (Size-1)))
+ return 0;
+
+ // Don't care enough about darwin/ppc to implement this.
+ if (TD.isBigEndian())
+ return 0;
+
+ // Convert to size in bytes.
+ Size /= 8;
+
+ // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
+ // if the top and bottom are the same (e.g. for vectors and large integers).
+ if (Size > 16) return 0;
+
+ // If the constant is exactly 16 bytes, just use it.
+ if (Size == 16) return C;
+
+ // Otherwise, we'll use an array of the constants.
+ unsigned ArraySize = 16/Size;
+ ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
+ return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C));
+}
+
+
+/// processLoopStridedStore - We see a strided store of some value. If we can
+/// transform this into a memset or memset_pattern in the loop preheader, do so.
+bool LoopIdiomRecognize::
+processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+ unsigned StoreAlignment, Value *StoredVal,
+ Instruction *TheStore, const SCEVAddRecExpr *Ev,
+ const SCEV *BECount) {
+
+ // If the stored value is a byte-wise value (like i32 -1), then it may be
+ // turned into a memset of i8 -1, assuming that all the consecutive bytes
+ // are stored. A store of i32 0x01020304 can never be turned into a memset,
+ // but it can be turned into memset_pattern if the target supports it.
+ Value *SplatValue = isBytewiseValue(StoredVal);
+ Constant *PatternValue = 0;
+
+ // If we're allowed to form a memset, and the stored value would be acceptable
+ // for memset, use it.
+ if (SplatValue && TLI->has(LibFunc::memset) &&
+ // Verify that the stored value is loop invariant. If not, we can't
+ // promote the memset.
+ CurLoop->isLoopInvariant(SplatValue)) {
+ // Keep and use SplatValue.
+ PatternValue = 0;
+ } else if (TLI->has(LibFunc::memset_pattern16) &&
+ (PatternValue = getMemSetPatternValue(StoredVal, *TD))) {
+ // It looks like we can use PatternValue!
+ SplatValue = 0;
+ } else {
+ // Otherwise, this isn't an idiom we can transform. For example, we can't
+ // do anything with a 3-byte store, for example.
+ return false;
+ }
+
+
+ // Okay, we have a strided store "p[i]" of a splattable value. We can turn
+ // this into a memset in the loop preheader now if we want. However, this
+ // would be unsafe to do if there is anything else in the loop that may read
+ // or write to the aliased location. Check for an alias.
+ if (mayLoopAccessLocation(DestPtr, AliasAnalysis::ModRef,
+ CurLoop, BECount,
+ StoreSize, getAnalysis<AliasAnalysis>(), TheStore))
+ return false;
+
+ // Okay, everything looks good, insert the memset.
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
+
+ IRBuilder<> Builder(Preheader->getTerminator());
+
+ // The trip count of the loop and the base pointer of the addrec SCEV is
+ // guaranteed to be loop invariant, which means that it should dominate the
+ // header. Just insert code for it in the preheader.
+ SCEVExpander Expander(*SE);
+
+ unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
+ Value *BasePtr =
+ Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
+ Preheader->getTerminator());
+
+ // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
+ // pointer size if it isn't already.
+ const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
+ BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+
+ const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
+ true /*no unsigned overflow*/);
+ if (StoreSize != 1)
+ NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+ true /*no unsigned overflow*/);
+
+ Value *NumBytes =
+ Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+
+ Value *NewCall;
+ if (SplatValue)
+ NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment);
+ else {
+ Module *M = TheStore->getParent()->getParent()->getParent();
+ Value *MSP = M->getOrInsertFunction("memset_pattern16",
+ Builder.getVoidTy(),
+ Builder.getInt8PtrTy(),
+ Builder.getInt8PtrTy(), IntPtr,
+ (void*)0);
+
+ // Otherwise we should form a memset_pattern16. PatternValue is known to be
+ // an constant array of 16-bytes. Plop the value into a mergable global.
+ GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
+ GlobalValue::InternalLinkage,
+ PatternValue, ".memset_pattern");
+ GV->setUnnamedAddr(true); // Ok to merge these.
+ GV->setAlignment(16);
+ Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy());
+ NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
+ }
+
+ DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
+ << " from store to: " << *Ev << " at: " << *TheStore << "\n");
+ (void)NewCall;
+
+ // Okay, the memset has been formed. Zap the original store and anything that
+ // feeds into it.
+ DeleteDeadInstruction(TheStore, *SE);
+ ++NumMemSet;
+ return true;
+}
+
+/// processLoopStoreOfLoopLoad - We see a strided store whose value is a
+/// same-strided load.
+bool LoopIdiomRecognize::
+processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
+ const SCEVAddRecExpr *StoreEv,
+ const SCEVAddRecExpr *LoadEv,
+ const SCEV *BECount) {
+ // If we're not allowed to form memcpy, we fail.
+ if (!TLI->has(LibFunc::memcpy))
+ return false;
+
+ LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
+
+ // Okay, we have a strided store "p[i]" of a loaded value. We can turn
+ // this into a memcpy in the loop preheader now if we want. However, this
+ // would be unsafe to do if there is anything else in the loop that may read
+ // or write to the stored location (including the load feeding the stores).
+ // Check for an alias.
+ if (mayLoopAccessLocation(SI->getPointerOperand(), AliasAnalysis::ModRef,
+ CurLoop, BECount, StoreSize,
+ getAnalysis<AliasAnalysis>(), SI))
+ return false;
+
+ // For a memcpy, we have to make sure that the input array is not being
+ // mutated by the loop.
+ if (mayLoopAccessLocation(LI->getPointerOperand(), AliasAnalysis::Mod,
+ CurLoop, BECount, StoreSize,
+ getAnalysis<AliasAnalysis>(), SI))
+ return false;
+
+ // Okay, everything looks good, insert the memcpy.
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
+
+ IRBuilder<> Builder(Preheader->getTerminator());
+
+ // The trip count of the loop and the base pointer of the addrec SCEV is
+ // guaranteed to be loop invariant, which means that it should dominate the
+ // header. Just insert code for it in the preheader.
+ SCEVExpander Expander(*SE);
+
+ Value *LoadBasePtr =
+ Expander.expandCodeFor(LoadEv->getStart(),
+ Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
+ Preheader->getTerminator());
+ Value *StoreBasePtr =
+ Expander.expandCodeFor(StoreEv->getStart(),
+ Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
+ Preheader->getTerminator());
+
+ // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
+ // pointer size if it isn't already.
+ const Type *IntPtr = TD->getIntPtrType(SI->getContext());
+ BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+
+ const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
+ true /*no unsigned overflow*/);
+ if (StoreSize != 1)
+ NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+ true /*no unsigned overflow*/);
+
+ Value *NumBytes =
+ Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+
+ Value *NewCall =
+ Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
+ std::min(SI->getAlignment(), LI->getAlignment()));
+
+ DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
+ << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
+ << " from store ptr=" << *StoreEv << " at: " << *SI << "\n");
+ (void)NewCall;
+
+ // Okay, the memset has been formed. Zap the original store and anything that
+ // feeds into it.
+ DeleteDeadInstruction(SI, *SE);
+ ++NumMemCpy;
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp
deleted file mode 100644
index a433674..0000000
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ /dev/null
@@ -1,1270 +0,0 @@
-//===- LoopIndexSplit.cpp - Loop Index Splitting Pass ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements Loop Index Splitting Pass. This pass handles three
-// kinds of loops.
-//
-// [1] A loop may be eliminated if the body is executed exactly once.
-// For example,
-//
-// for (i = 0; i < N; ++i) {
-// if (i == X) {
-// body;
-// }
-// }
-//
-// is transformed to
-//
-// i = X;
-// body;
-//
-// [2] A loop's iteration space may be shrunk if the loop body is executed
-// for a proper sub-range of the loop's iteration space. For example,
-//
-// for (i = 0; i < N; ++i) {
-// if (i > A && i < B) {
-// ...
-// }
-// }
-//
-// is transformed to iterators from A to B, if A > 0 and B < N.
-//
-// [3] A loop may be split if the loop body is dominated by a branch.
-// For example,
-//
-// for (i = LB; i < UB; ++i) { if (i < SV) A; else B; }
-//
-// is transformed into
-//
-// AEV = BSV = SV
-// for (i = LB; i < min(UB, AEV); ++i)
-// A;
-// for (i = max(LB, BSV); i < UB; ++i);
-// B;
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "loop-index-split"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/Statistic.h"
-
-using namespace llvm;
-
-STATISTIC(NumIndexSplit, "Number of loop index split");
-STATISTIC(NumIndexSplitRemoved, "Number of loops eliminated by loop index split");
-STATISTIC(NumRestrictBounds, "Number of loop iteration space restricted");
-
-namespace {
-
- class LoopIndexSplit : public LoopPass {
- public:
- static char ID; // Pass ID, replacement for typeid
- LoopIndexSplit() : LoopPass(ID) {}
-
- // Index split Loop L. Return true if loop is split.
- bool runOnLoop(Loop *L, LPPassManager &LPM);
-
- void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<ScalarEvolution>();
- AU.addRequiredID(LCSSAID);
- AU.addPreservedID(LCSSAID);
- AU.addRequired<LoopInfo>();
- AU.addPreserved<LoopInfo>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addPreservedID(LoopSimplifyID);
- AU.addRequired<DominatorTree>();
- AU.addRequired<DominanceFrontier>();
- AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
- }
-
- private:
- /// processOneIterationLoop -- Eliminate loop if loop body is executed
- /// only once. For example,
- /// for (i = 0; i < N; ++i) {
- /// if ( i == X) {
- /// ...
- /// }
- /// }
- ///
- bool processOneIterationLoop();
-
- // -- Routines used by updateLoopIterationSpace();
-
- /// updateLoopIterationSpace -- Update loop's iteration space if loop
- /// body is executed for certain IV range only. For example,
- ///
- /// for (i = 0; i < N; ++i) {
- /// if ( i > A && i < B) {
- /// ...
- /// }
- /// }
- /// is transformed to iterators from A to B, if A > 0 and B < N.
- ///
- bool updateLoopIterationSpace();
-
- /// restrictLoopBound - Op dominates loop body. Op compares an IV based value
- /// with a loop invariant value. Update loop's lower and upper bound based on
- /// the loop invariant value.
- bool restrictLoopBound(ICmpInst &Op);
-
- // --- Routines used by splitLoop(). --- /
-
- bool splitLoop();
-
- /// removeBlocks - Remove basic block DeadBB and all blocks dominated by
- /// DeadBB. This routine is used to remove split condition's dead branch,
- /// dominated by DeadBB. LiveBB dominates split conidition's other branch.
- void removeBlocks(BasicBlock *DeadBB, Loop *LP, BasicBlock *LiveBB);
-
- /// moveExitCondition - Move exit condition EC into split condition block.
- void moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB,
- BasicBlock *ExitBB, ICmpInst *EC, ICmpInst *SC,
- PHINode *IV, Instruction *IVAdd, Loop *LP,
- unsigned);
-
- /// updatePHINodes - CFG has been changed.
- /// Before
- /// - ExitBB's single predecessor was Latch
- /// - Latch's second successor was Header
- /// Now
- /// - ExitBB's single predecessor was Header
- /// - Latch's one and only successor was Header
- ///
- /// Update ExitBB PHINodes' to reflect this change.
- void updatePHINodes(BasicBlock *ExitBB, BasicBlock *Latch,
- BasicBlock *Header,
- PHINode *IV, Instruction *IVIncrement, Loop *LP);
-
- // --- Utility routines --- /
-
- /// cleanBlock - A block is considered clean if all non terminal
- /// instructions are either PHINodes or IV based values.
- bool cleanBlock(BasicBlock *BB);
-
- /// IVisLT - If Op is comparing IV based value with an loop invariant and
- /// IV based value is less than the loop invariant then return the loop
- /// invariant. Otherwise return NULL.
- Value * IVisLT(ICmpInst &Op);
-
- /// IVisLE - If Op is comparing IV based value with an loop invariant and
- /// IV based value is less than or equal to the loop invariant then
- /// return the loop invariant. Otherwise return NULL.
- Value * IVisLE(ICmpInst &Op);
-
- /// IVisGT - If Op is comparing IV based value with an loop invariant and
- /// IV based value is greater than the loop invariant then return the loop
- /// invariant. Otherwise return NULL.
- Value * IVisGT(ICmpInst &Op);
-
- /// IVisGE - If Op is comparing IV based value with an loop invariant and
- /// IV based value is greater than or equal to the loop invariant then
- /// return the loop invariant. Otherwise return NULL.
- Value * IVisGE(ICmpInst &Op);
-
- private:
-
- // Current Loop information.
- Loop *L;
- LPPassManager *LPM;
- LoopInfo *LI;
- DominatorTree *DT;
- DominanceFrontier *DF;
-
- PHINode *IndVar;
- ICmpInst *ExitCondition;
- ICmpInst *SplitCondition;
- Value *IVStartValue;
- Value *IVExitValue;
- Instruction *IVIncrement;
- SmallPtrSet<Value *, 4> IVBasedValues;
- };
-}
-
-char LoopIndexSplit::ID = 0;
-INITIALIZE_PASS(LoopIndexSplit, "loop-index-split",
- "Index Split Loops", false, false);
-
-Pass *llvm::createLoopIndexSplitPass() {
- return new LoopIndexSplit();
-}
-
-// Index split Loop L. Return true if loop is split.
-bool LoopIndexSplit::runOnLoop(Loop *IncomingLoop, LPPassManager &LPM_Ref) {
- L = IncomingLoop;
- LPM = &LPM_Ref;
-
- // If LoopSimplify form is not available, stay out of trouble.
- if (!L->isLoopSimplifyForm())
- return false;
-
- // FIXME - Nested loops make dominator info updates tricky.
- if (!L->getSubLoops().empty())
- return false;
-
- DT = &getAnalysis<DominatorTree>();
- LI = &getAnalysis<LoopInfo>();
- DF = &getAnalysis<DominanceFrontier>();
-
- // Initialize loop data.
- IndVar = L->getCanonicalInductionVariable();
- if (!IndVar) return false;
-
- bool P1InLoop = L->contains(IndVar->getIncomingBlock(1));
- IVStartValue = IndVar->getIncomingValue(!P1InLoop);
- IVIncrement = dyn_cast<Instruction>(IndVar->getIncomingValue(P1InLoop));
- if (!IVIncrement) return false;
-
- IVBasedValues.clear();
- IVBasedValues.insert(IndVar);
- IVBasedValues.insert(IVIncrement);
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I)
- for(BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end();
- BI != BE; ++BI) {
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(BI))
- if (BO != IVIncrement
- && (BO->getOpcode() == Instruction::Add
- || BO->getOpcode() == Instruction::Sub))
- if (IVBasedValues.count(BO->getOperand(0))
- && L->isLoopInvariant(BO->getOperand(1)))
- IVBasedValues.insert(BO);
- }
-
- // Reject loop if loop exit condition is not suitable.
- BasicBlock *ExitingBlock = L->getExitingBlock();
- if (!ExitingBlock)
- return false;
- BranchInst *EBR = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
- if (!EBR) return false;
- ExitCondition = dyn_cast<ICmpInst>(EBR->getCondition());
- if (!ExitCondition) return false;
- if (ExitingBlock != L->getLoopLatch()) return false;
- IVExitValue = ExitCondition->getOperand(1);
- if (!L->isLoopInvariant(IVExitValue))
- IVExitValue = ExitCondition->getOperand(0);
- if (!L->isLoopInvariant(IVExitValue))
- return false;
- if (!IVBasedValues.count(
- ExitCondition->getOperand(IVExitValue == ExitCondition->getOperand(0))))
- return false;
-
- // If start value is more then exit value where induction variable
- // increments by 1 then we are potentially dealing with an infinite loop.
- // Do not index split this loop.
- if (ConstantInt *SV = dyn_cast<ConstantInt>(IVStartValue))
- if (ConstantInt *EV = dyn_cast<ConstantInt>(IVExitValue))
- if (SV->getSExtValue() > EV->getSExtValue())
- return false;
-
- if (processOneIterationLoop())
- return true;
-
- if (updateLoopIterationSpace())
- return true;
-
- if (splitLoop())
- return true;
-
- return false;
-}
-
-// --- Helper routines ---
-// isUsedOutsideLoop - Returns true iff V is used outside the loop L.
-static bool isUsedOutsideLoop(Value *V, Loop *L) {
- for(Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
- if (!L->contains(cast<Instruction>(*UI)))
- return true;
- return false;
-}
-
-// Return V+1
-static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt,
- LLVMContext &Context) {
- Constant *One = ConstantInt::get(V->getType(), 1, Sign);
- return BinaryOperator::CreateAdd(V, One, "lsp", InsertPt);
-}
-
-// Return V-1
-static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt,
- LLVMContext &Context) {
- Constant *One = ConstantInt::get(V->getType(), 1, Sign);
- return BinaryOperator::CreateSub(V, One, "lsp", InsertPt);
-}
-
-// Return min(V1, V1)
-static Value *getMin(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) {
-
- Value *C = new ICmpInst(InsertPt,
- Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
- V1, V2, "lsp");
- return SelectInst::Create(C, V1, V2, "lsp", InsertPt);
-}
-
-// Return max(V1, V2)
-static Value *getMax(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) {
-
- Value *C = new ICmpInst(InsertPt,
- Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
- V1, V2, "lsp");
- return SelectInst::Create(C, V2, V1, "lsp", InsertPt);
-}
-
-/// processOneIterationLoop -- Eliminate loop if loop body is executed
-/// only once. For example,
-/// for (i = 0; i < N; ++i) {
-/// if ( i == X) {
-/// ...
-/// }
-/// }
-///
-bool LoopIndexSplit::processOneIterationLoop() {
- SplitCondition = NULL;
- BasicBlock *Latch = L->getLoopLatch();
- BasicBlock *Header = L->getHeader();
- BranchInst *BR = dyn_cast<BranchInst>(Header->getTerminator());
- if (!BR) return false;
- if (!isa<BranchInst>(Latch->getTerminator())) return false;
- if (BR->isUnconditional()) return false;
- SplitCondition = dyn_cast<ICmpInst>(BR->getCondition());
- if (!SplitCondition) return false;
- if (SplitCondition == ExitCondition) return false;
- if (SplitCondition->getPredicate() != ICmpInst::ICMP_EQ) return false;
- if (BR->getOperand(1) != Latch) return false;
- if (!IVBasedValues.count(SplitCondition->getOperand(0))
- && !IVBasedValues.count(SplitCondition->getOperand(1)))
- return false;
-
- // If IV is used outside the loop then this loop traversal is required.
- // FIXME: Calculate and use last IV value.
- if (isUsedOutsideLoop(IVIncrement, L))
- return false;
-
- // If BR operands are not IV or not loop invariants then skip this loop.
- Value *OPV = SplitCondition->getOperand(0);
- Value *SplitValue = SplitCondition->getOperand(1);
- if (!L->isLoopInvariant(SplitValue))
- std::swap(OPV, SplitValue);
- if (!L->isLoopInvariant(SplitValue))
- return false;
- Instruction *OPI = dyn_cast<Instruction>(OPV);
- if (!OPI)
- return false;
- if (OPI->getParent() != Header || isUsedOutsideLoop(OPI, L))
- return false;
- Value *StartValue = IVStartValue;
- Value *ExitValue = IVExitValue;;
-
- if (OPV != IndVar) {
- // If BR operand is IV based then use this operand to calculate
- // effective conditions for loop body.
- BinaryOperator *BOPV = dyn_cast<BinaryOperator>(OPV);
- if (!BOPV)
- return false;
- if (BOPV->getOpcode() != Instruction::Add)
- return false;
- StartValue = BinaryOperator::CreateAdd(OPV, StartValue, "" , BR);
- ExitValue = BinaryOperator::CreateAdd(OPV, ExitValue, "" , BR);
- }
-
- if (!cleanBlock(Header))
- return false;
-
- if (!cleanBlock(Latch))
- return false;
-
- // If the merge point for BR is not loop latch then skip this loop.
- if (BR->getSuccessor(0) != Latch) {
- DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
- assert (DF0 != DF->end() && "Unable to find dominance frontier");
- if (!DF0->second.count(Latch))
- return false;
- }
-
- if (BR->getSuccessor(1) != Latch) {
- DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
- assert (DF1 != DF->end() && "Unable to find dominance frontier");
- if (!DF1->second.count(Latch))
- return false;
- }
-
- // Now, Current loop L contains compare instruction
- // that compares induction variable, IndVar, against loop invariant. And
- // entire (i.e. meaningful) loop body is dominated by this compare
- // instruction. In such case eliminate
- // loop structure surrounding this loop body. For example,
- // for (int i = start; i < end; ++i) {
- // if ( i == somevalue) {
- // loop_body
- // }
- // }
- // can be transformed into
- // if (somevalue >= start && somevalue < end) {
- // i = somevalue;
- // loop_body
- // }
-
- // Replace index variable with split value in loop body. Loop body is executed
- // only when index variable is equal to split value.
- IndVar->replaceAllUsesWith(SplitValue);
-
- // Replace split condition in header.
- // Transform
- // SplitCondition : icmp eq i32 IndVar, SplitValue
- // into
- // c1 = icmp uge i32 SplitValue, StartValue
- // c2 = icmp ult i32 SplitValue, ExitValue
- // and i32 c1, c2
- Instruction *C1 = new ICmpInst(BR, ExitCondition->isSigned() ?
- ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE,
- SplitValue, StartValue, "lisplit");
-
- CmpInst::Predicate C2P = ExitCondition->getPredicate();
- BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
- if (LatchBR->getOperand(1) != Header)
- C2P = CmpInst::getInversePredicate(C2P);
- Instruction *C2 = new ICmpInst(BR, C2P, SplitValue, ExitValue, "lisplit");
- Instruction *NSplitCond = BinaryOperator::CreateAnd(C1, C2, "lisplit", BR);
-
- SplitCondition->replaceAllUsesWith(NSplitCond);
- SplitCondition->eraseFromParent();
-
- // Remove Latch to Header edge.
- BasicBlock *LatchSucc = NULL;
- Header->removePredecessor(Latch);
- for (succ_iterator SI = succ_begin(Latch), E = succ_end(Latch);
- SI != E; ++SI) {
- if (Header != *SI)
- LatchSucc = *SI;
- }
-
- // Clean up latch block.
- Value *LatchBRCond = LatchBR->getCondition();
- LatchBR->setUnconditionalDest(LatchSucc);
- RecursivelyDeleteTriviallyDeadInstructions(LatchBRCond);
-
- LPM->deleteLoopFromQueue(L);
-
- // Update Dominator Info.
- // Only CFG change done is to remove Latch to Header edge. This
- // does not change dominator tree because Latch did not dominate
- // Header.
- if (DF) {
- DominanceFrontier::iterator HeaderDF = DF->find(Header);
- if (HeaderDF != DF->end())
- DF->removeFromFrontier(HeaderDF, Header);
-
- DominanceFrontier::iterator LatchDF = DF->find(Latch);
- if (LatchDF != DF->end())
- DF->removeFromFrontier(LatchDF, Header);
- }
-
- ++NumIndexSplitRemoved;
- return true;
-}
-
-/// restrictLoopBound - Op dominates loop body. Op compares an IV based value
-/// with a loop invariant value. Update loop's lower and upper bound based on
-/// the loop invariant value.
-bool LoopIndexSplit::restrictLoopBound(ICmpInst &Op) {
- bool Sign = Op.isSigned();
- Instruction *PHTerm = L->getLoopPreheader()->getTerminator();
-
- if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) {
- BranchInst *EBR =
- cast<BranchInst>(ExitCondition->getParent()->getTerminator());
- ExitCondition->setPredicate(ExitCondition->getInversePredicate());
- BasicBlock *T = EBR->getSuccessor(0);
- EBR->setSuccessor(0, EBR->getSuccessor(1));
- EBR->setSuccessor(1, T);
- }
-
- LLVMContext &Context = Op.getContext();
-
- // New upper and lower bounds.
- Value *NLB = NULL;
- Value *NUB = NULL;
- if (Value *V = IVisLT(Op)) {
- // Restrict upper bound.
- if (IVisLE(*ExitCondition))
- V = getMinusOne(V, Sign, PHTerm, Context);
- NUB = getMin(V, IVExitValue, Sign, PHTerm);
- } else if (Value *V = IVisLE(Op)) {
- // Restrict upper bound.
- if (IVisLT(*ExitCondition))
- V = getPlusOne(V, Sign, PHTerm, Context);
- NUB = getMin(V, IVExitValue, Sign, PHTerm);
- } else if (Value *V = IVisGT(Op)) {
- // Restrict lower bound.
- V = getPlusOne(V, Sign, PHTerm, Context);
- NLB = getMax(V, IVStartValue, Sign, PHTerm);
- } else if (Value *V = IVisGE(Op))
- // Restrict lower bound.
- NLB = getMax(V, IVStartValue, Sign, PHTerm);
-
- if (!NLB && !NUB)
- return false;
-
- if (NLB) {
- unsigned i = IndVar->getBasicBlockIndex(L->getLoopPreheader());
- IndVar->setIncomingValue(i, NLB);
- }
-
- if (NUB) {
- unsigned i = (ExitCondition->getOperand(0) != IVExitValue);
- ExitCondition->setOperand(i, NUB);
- }
- return true;
-}
-
-/// updateLoopIterationSpace -- Update loop's iteration space if loop
-/// body is executed for certain IV range only. For example,
-///
-/// for (i = 0; i < N; ++i) {
-/// if ( i > A && i < B) {
-/// ...
-/// }
-/// }
-/// is transformed to iterators from A to B, if A > 0 and B < N.
-///
-bool LoopIndexSplit::updateLoopIterationSpace() {
- SplitCondition = NULL;
- if (ExitCondition->getPredicate() == ICmpInst::ICMP_NE
- || ExitCondition->getPredicate() == ICmpInst::ICMP_EQ)
- return false;
- BasicBlock *Latch = L->getLoopLatch();
- BasicBlock *Header = L->getHeader();
- BranchInst *BR = dyn_cast<BranchInst>(Header->getTerminator());
- if (!BR) return false;
- if (!isa<BranchInst>(Latch->getTerminator())) return false;
- if (BR->isUnconditional()) return false;
- BinaryOperator *AND = dyn_cast<BinaryOperator>(BR->getCondition());
- if (!AND) return false;
- if (AND->getOpcode() != Instruction::And) return false;
- ICmpInst *Op0 = dyn_cast<ICmpInst>(AND->getOperand(0));
- ICmpInst *Op1 = dyn_cast<ICmpInst>(AND->getOperand(1));
- if (!Op0 || !Op1)
- return false;
- IVBasedValues.insert(AND);
- IVBasedValues.insert(Op0);
- IVBasedValues.insert(Op1);
- if (!cleanBlock(Header)) return false;
- BasicBlock *ExitingBlock = ExitCondition->getParent();
- if (!cleanBlock(ExitingBlock)) return false;
-
- // If the merge point for BR is not loop latch then skip this loop.
- if (BR->getSuccessor(0) != Latch) {
- DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
- assert (DF0 != DF->end() && "Unable to find dominance frontier");
- if (!DF0->second.count(Latch))
- return false;
- }
-
- if (BR->getSuccessor(1) != Latch) {
- DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
- assert (DF1 != DF->end() && "Unable to find dominance frontier");
- if (!DF1->second.count(Latch))
- return false;
- }
-
- // Verify that loop exiting block has only two predecessor, where one pred
- // is split condition block. The other predecessor will become exiting block's
- // dominator after CFG is updated. TODO : Handle CFG's where exiting block has
- // more then two predecessors. This requires extra work in updating dominator
- // information.
- BasicBlock *ExitingBBPred = NULL;
- for (pred_iterator PI = pred_begin(ExitingBlock), PE = pred_end(ExitingBlock);
- PI != PE; ++PI) {
- BasicBlock *BB = *PI;
- if (Header == BB)
- continue;
- if (ExitingBBPred)
- return false;
- else
- ExitingBBPred = BB;
- }
-
- if (!restrictLoopBound(*Op0))
- return false;
-
- if (!restrictLoopBound(*Op1))
- return false;
-
- // Update CFG.
- if (BR->getSuccessor(0) == ExitingBlock)
- BR->setUnconditionalDest(BR->getSuccessor(1));
- else
- BR->setUnconditionalDest(BR->getSuccessor(0));
-
- AND->eraseFromParent();
- if (Op0->use_empty())
- Op0->eraseFromParent();
- if (Op1->use_empty())
- Op1->eraseFromParent();
-
- // Update domiantor info. Now, ExitingBlock has only one predecessor,
- // ExitingBBPred, and it is ExitingBlock's immediate domiantor.
- DT->changeImmediateDominator(ExitingBlock, ExitingBBPred);
-
- BasicBlock *ExitBlock = ExitingBlock->getTerminator()->getSuccessor(1);
- if (L->contains(ExitBlock))
- ExitBlock = ExitingBlock->getTerminator()->getSuccessor(0);
-
- // If ExitingBlock is a member of the loop basic blocks' DF list then
- // replace ExitingBlock with header and exit block in the DF list
- DominanceFrontier::iterator ExitingBlockDF = DF->find(ExitingBlock);
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I) {
- BasicBlock *BB = *I;
- if (BB == Header || BB == ExitingBlock)
- continue;
- DominanceFrontier::iterator BBDF = DF->find(BB);
- DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin();
- DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end();
- while (DomSetI != DomSetE) {
- DominanceFrontier::DomSetType::iterator CurrentItr = DomSetI;
- ++DomSetI;
- BasicBlock *DFBB = *CurrentItr;
- if (DFBB == ExitingBlock) {
- BBDF->second.erase(DFBB);
- for (DominanceFrontier::DomSetType::iterator
- EBI = ExitingBlockDF->second.begin(),
- EBE = ExitingBlockDF->second.end(); EBI != EBE; ++EBI)
- BBDF->second.insert(*EBI);
- }
- }
- }
- ++NumRestrictBounds;
- return true;
-}
-
-/// removeBlocks - Remove basic block DeadBB and all blocks dominated by DeadBB.
-/// This routine is used to remove split condition's dead branch, dominated by
-/// DeadBB. LiveBB dominates split conidition's other branch.
-void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP,
- BasicBlock *LiveBB) {
-
- // First update DeadBB's dominance frontier.
- SmallVector<BasicBlock *, 8> FrontierBBs;
- DominanceFrontier::iterator DeadBBDF = DF->find(DeadBB);
- if (DeadBBDF != DF->end()) {
- SmallVector<BasicBlock *, 8> PredBlocks;
-
- DominanceFrontier::DomSetType DeadBBSet = DeadBBDF->second;
- for (DominanceFrontier::DomSetType::iterator DeadBBSetI = DeadBBSet.begin(),
- DeadBBSetE = DeadBBSet.end(); DeadBBSetI != DeadBBSetE; ++DeadBBSetI)
- {
- BasicBlock *FrontierBB = *DeadBBSetI;
- FrontierBBs.push_back(FrontierBB);
-
- // Rremove any PHI incoming edge from blocks dominated by DeadBB.
- PredBlocks.clear();
- for(pred_iterator PI = pred_begin(FrontierBB), PE = pred_end(FrontierBB);
- PI != PE; ++PI) {
- BasicBlock *P = *PI;
- if (DT->dominates(DeadBB, P))
- PredBlocks.push_back(P);
- }
-
- for(BasicBlock::iterator FBI = FrontierBB->begin(), FBE = FrontierBB->end();
- FBI != FBE; ++FBI) {
- if (PHINode *PN = dyn_cast<PHINode>(FBI)) {
- for(SmallVector<BasicBlock *, 8>::iterator PI = PredBlocks.begin(),
- PE = PredBlocks.end(); PI != PE; ++PI) {
- BasicBlock *P = *PI;
- PN->removeIncomingValue(P);
- }
- }
- else
- break;
- }
- }
- }
-
- // Now remove DeadBB and all nodes dominated by DeadBB in df order.
- SmallVector<BasicBlock *, 32> WorkList;
- DomTreeNode *DN = DT->getNode(DeadBB);
- for (df_iterator<DomTreeNode*> DI = df_begin(DN),
- E = df_end(DN); DI != E; ++DI) {
- BasicBlock *BB = DI->getBlock();
- WorkList.push_back(BB);
- BB->replaceAllUsesWith(UndefValue::get(
- Type::getLabelTy(DeadBB->getContext())));
- }
-
- while (!WorkList.empty()) {
- BasicBlock *BB = WorkList.pop_back_val();
- LPM->deleteSimpleAnalysisValue(BB, LP);
- for(BasicBlock::iterator BBI = BB->begin(), BBE = BB->end();
- BBI != BBE; ) {
- Instruction *I = BBI;
- ++BBI;
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
- LPM->deleteSimpleAnalysisValue(I, LP);
- I->eraseFromParent();
- }
- DT->eraseNode(BB);
- DF->removeBlock(BB);
- LI->removeBlock(BB);
- BB->eraseFromParent();
- }
-
- // Update Frontier BBs' dominator info.
- while (!FrontierBBs.empty()) {
- BasicBlock *FBB = FrontierBBs.pop_back_val();
- BasicBlock *NewDominator = FBB->getSinglePredecessor();
- if (!NewDominator) {
- pred_iterator PI = pred_begin(FBB), PE = pred_end(FBB);
- NewDominator = *PI;
- ++PI;
- if (NewDominator != LiveBB) {
- for(; PI != PE; ++PI) {
- BasicBlock *P = *PI;
- if (P == LiveBB) {
- NewDominator = LiveBB;
- break;
- }
- NewDominator = DT->findNearestCommonDominator(NewDominator, P);
- }
- }
- }
- assert (NewDominator && "Unable to fix dominator info.");
- DT->changeImmediateDominator(FBB, NewDominator);
- DF->changeImmediateDominator(FBB, NewDominator, DT);
- }
-
-}
-
-// moveExitCondition - Move exit condition EC into split condition block CondBB.
-void LoopIndexSplit::moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB,
- BasicBlock *ExitBB, ICmpInst *EC,
- ICmpInst *SC, PHINode *IV,
- Instruction *IVAdd, Loop *LP,
- unsigned ExitValueNum) {
-
- BasicBlock *ExitingBB = EC->getParent();
- Instruction *CurrentBR = CondBB->getTerminator();
-
- // Move exit condition into split condition block.
- EC->moveBefore(CurrentBR);
- EC->setOperand(ExitValueNum == 0 ? 1 : 0, IV);
-
- // Move exiting block's branch into split condition block. Update its branch
- // destination.
- BranchInst *ExitingBR = cast<BranchInst>(ExitingBB->getTerminator());
- ExitingBR->moveBefore(CurrentBR);
- BasicBlock *OrigDestBB = NULL;
- if (ExitingBR->getSuccessor(0) == ExitBB) {
- OrigDestBB = ExitingBR->getSuccessor(1);
- ExitingBR->setSuccessor(1, ActiveBB);
- }
- else {
- OrigDestBB = ExitingBR->getSuccessor(0);
- ExitingBR->setSuccessor(0, ActiveBB);
- }
-
- // Remove split condition and current split condition branch.
- SC->eraseFromParent();
- CurrentBR->eraseFromParent();
-
- // Connect exiting block to original destination.
- BranchInst::Create(OrigDestBB, ExitingBB);
-
- // Update PHINodes
- updatePHINodes(ExitBB, ExitingBB, CondBB, IV, IVAdd, LP);
-
- // Fix dominator info.
- // ExitBB is now dominated by CondBB
- DT->changeImmediateDominator(ExitBB, CondBB);
- DF->changeImmediateDominator(ExitBB, CondBB, DT);
-
- // Blocks outside the loop may have been in the dominance frontier of blocks
- // inside the condition; this is now impossible because the blocks inside the
- // condition no loger dominate the exit. Remove the relevant blocks from
- // the dominance frontiers.
- for (Loop::block_iterator I = LP->block_begin(), E = LP->block_end();
- I != E; ++I) {
- if (!DT->properlyDominates(CondBB, *I)) continue;
- DominanceFrontier::iterator BBDF = DF->find(*I);
- DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin();
- DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end();
- while (DomSetI != DomSetE) {
- DominanceFrontier::DomSetType::iterator CurrentItr = DomSetI;
- ++DomSetI;
- BasicBlock *DFBB = *CurrentItr;
- if (!LP->contains(DFBB))
- BBDF->second.erase(DFBB);
- }
- }
-}
-
-/// updatePHINodes - CFG has been changed.
-/// Before
-/// - ExitBB's single predecessor was Latch
-/// - Latch's second successor was Header
-/// Now
-/// - ExitBB's single predecessor is Header
-/// - Latch's one and only successor is Header
-///
-/// Update ExitBB PHINodes' to reflect this change.
-void LoopIndexSplit::updatePHINodes(BasicBlock *ExitBB, BasicBlock *Latch,
- BasicBlock *Header,
- PHINode *IV, Instruction *IVIncrement,
- Loop *LP) {
-
- for (BasicBlock::iterator BI = ExitBB->begin(), BE = ExitBB->end();
- BI != BE; ) {
- PHINode *PN = dyn_cast<PHINode>(BI);
- ++BI;
- if (!PN)
- break;
-
- Value *V = PN->getIncomingValueForBlock(Latch);
- if (PHINode *PHV = dyn_cast<PHINode>(V)) {
- // PHV is in Latch. PHV has one use is in ExitBB PHINode. And one use
- // in Header which is new incoming value for PN.
- Value *NewV = NULL;
- for (Value::use_iterator UI = PHV->use_begin(), E = PHV->use_end();
- UI != E; ++UI)
- if (PHINode *U = dyn_cast<PHINode>(*UI))
- if (LP->contains(U)) {
- NewV = U;
- break;
- }
-
- // Add incoming value from header only if PN has any use inside the loop.
- if (NewV)
- PN->addIncoming(NewV, Header);
-
- } else if (Instruction *PHI = dyn_cast<Instruction>(V)) {
- // If this instruction is IVIncrement then IV is new incoming value
- // from header otherwise this instruction must be incoming value from
- // header because loop is in LCSSA form.
- if (PHI == IVIncrement)
- PN->addIncoming(IV, Header);
- else
- PN->addIncoming(V, Header);
- } else
- // Otherwise this is an incoming value from header because loop is in
- // LCSSA form.
- PN->addIncoming(V, Header);
-
- // Remove incoming value from Latch.
- PN->removeIncomingValue(Latch);
- }
-}
-
-bool LoopIndexSplit::splitLoop() {
- SplitCondition = NULL;
- if (ExitCondition->getPredicate() == ICmpInst::ICMP_NE
- || ExitCondition->getPredicate() == ICmpInst::ICMP_EQ)
- return false;
- BasicBlock *Header = L->getHeader();
- BasicBlock *Latch = L->getLoopLatch();
- BranchInst *SBR = NULL; // Split Condition Branch
- BranchInst *EBR = cast<BranchInst>(ExitCondition->getParent()->getTerminator());
- // If Exiting block includes loop variant instructions then this
- // loop may not be split safely.
- BasicBlock *ExitingBlock = ExitCondition->getParent();
- if (!cleanBlock(ExitingBlock)) return false;
-
- LLVMContext &Context = Header->getContext();
-
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I) {
- BranchInst *BR = dyn_cast<BranchInst>((*I)->getTerminator());
- if (!BR || BR->isUnconditional()) continue;
- ICmpInst *CI = dyn_cast<ICmpInst>(BR->getCondition());
- if (!CI || CI == ExitCondition
- || CI->getPredicate() == ICmpInst::ICMP_NE
- || CI->getPredicate() == ICmpInst::ICMP_EQ)
- continue;
-
- // Unable to handle triangle loops at the moment.
- // In triangle loop, split condition is in header and one of the
- // the split destination is loop latch. If split condition is EQ
- // then such loops are already handle in processOneIterationLoop().
- if (Header == (*I)
- && (Latch == BR->getSuccessor(0) || Latch == BR->getSuccessor(1)))
- continue;
-
- // If the block does not dominate the latch then this is not a diamond.
- // Such loop may not benefit from index split.
- if (!DT->dominates((*I), Latch))
- continue;
-
- // If split condition branches heads do not have single predecessor,
- // SplitCondBlock, then is not possible to remove inactive branch.
- if (!BR->getSuccessor(0)->getSinglePredecessor()
- || !BR->getSuccessor(1)->getSinglePredecessor())
- return false;
-
- // If the merge point for BR is not loop latch then skip this condition.
- if (BR->getSuccessor(0) != Latch) {
- DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
- assert (DF0 != DF->end() && "Unable to find dominance frontier");
- if (!DF0->second.count(Latch))
- continue;
- }
-
- if (BR->getSuccessor(1) != Latch) {
- DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
- assert (DF1 != DF->end() && "Unable to find dominance frontier");
- if (!DF1->second.count(Latch))
- continue;
- }
- SplitCondition = CI;
- SBR = BR;
- break;
- }
-
- if (!SplitCondition)
- return false;
-
- // If the predicate sign does not match then skip.
- if (ExitCondition->isSigned() != SplitCondition->isSigned())
- return false;
-
- unsigned EVOpNum = (ExitCondition->getOperand(1) == IVExitValue);
- unsigned SVOpNum = IVBasedValues.count(SplitCondition->getOperand(0));
- Value *SplitValue = SplitCondition->getOperand(SVOpNum);
- if (!L->isLoopInvariant(SplitValue))
- return false;
- if (!IVBasedValues.count(SplitCondition->getOperand(!SVOpNum)))
- return false;
-
- // Check for side effects.
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I) {
- BasicBlock *BB = *I;
-
- assert(DT->dominates(Header, BB));
- if (DT->properlyDominates(SplitCondition->getParent(), BB))
- continue;
-
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
- BI != BE; ++BI) {
- Instruction *Inst = BI;
-
- if (!Inst->isSafeToSpeculativelyExecute() && !isa<PHINode>(Inst)
- && !isa<BranchInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst))
- return false;
- }
- }
-
- // Normalize loop conditions so that it is easier to calculate new loop
- // bounds.
- if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) {
- ExitCondition->setPredicate(ExitCondition->getInversePredicate());
- BasicBlock *T = EBR->getSuccessor(0);
- EBR->setSuccessor(0, EBR->getSuccessor(1));
- EBR->setSuccessor(1, T);
- }
-
- if (IVisGT(*SplitCondition) || IVisGE(*SplitCondition)) {
- SplitCondition->setPredicate(SplitCondition->getInversePredicate());
- BasicBlock *T = SBR->getSuccessor(0);
- SBR->setSuccessor(0, SBR->getSuccessor(1));
- SBR->setSuccessor(1, T);
- }
-
- //[*] Calculate new loop bounds.
- Value *AEV = SplitValue;
- Value *BSV = SplitValue;
- bool Sign = SplitCondition->isSigned();
- Instruction *PHTerm = L->getLoopPreheader()->getTerminator();
-
- if (IVisLT(*ExitCondition)) {
- if (IVisLT(*SplitCondition)) {
- /* Do nothing */
- }
- else if (IVisLE(*SplitCondition)) {
- AEV = getPlusOne(SplitValue, Sign, PHTerm, Context);
- BSV = getPlusOne(SplitValue, Sign, PHTerm, Context);
- } else {
- assert (0 && "Unexpected split condition!");
- }
- }
- else if (IVisLE(*ExitCondition)) {
- if (IVisLT(*SplitCondition)) {
- AEV = getMinusOne(SplitValue, Sign, PHTerm, Context);
- }
- else if (IVisLE(*SplitCondition)) {
- BSV = getPlusOne(SplitValue, Sign, PHTerm, Context);
- } else {
- assert (0 && "Unexpected split condition!");
- }
- } else {
- assert (0 && "Unexpected exit condition!");
- }
- AEV = getMin(AEV, IVExitValue, Sign, PHTerm);
- BSV = getMax(BSV, IVStartValue, Sign, PHTerm);
-
- // [*] Clone Loop
- ValueMap<const Value *, Value *> VMap;
- Loop *BLoop = CloneLoop(L, LPM, LI, VMap, this);
- Loop *ALoop = L;
-
- // [*] ALoop's exiting edge enters BLoop's header.
- // ALoop's original exit block becomes BLoop's exit block.
- PHINode *B_IndVar = cast<PHINode>(VMap[IndVar]);
- BasicBlock *A_ExitingBlock = ExitCondition->getParent();
- BranchInst *A_ExitInsn =
- dyn_cast<BranchInst>(A_ExitingBlock->getTerminator());
- assert (A_ExitInsn && "Unable to find suitable loop exit branch");
- BasicBlock *B_ExitBlock = A_ExitInsn->getSuccessor(1);
- BasicBlock *B_Header = BLoop->getHeader();
- if (ALoop->contains(B_ExitBlock)) {
- B_ExitBlock = A_ExitInsn->getSuccessor(0);
- A_ExitInsn->setSuccessor(0, B_Header);
- } else
- A_ExitInsn->setSuccessor(1, B_Header);
-
- // [*] Update ALoop's exit value using new exit value.
- ExitCondition->setOperand(EVOpNum, AEV);
-
- // [*] Update BLoop's header phi nodes. Remove incoming PHINode's from
- // original loop's preheader. Add incoming PHINode values from
- // ALoop's exiting block. Update BLoop header's domiantor info.
-
- // Collect inverse map of Header PHINodes.
- DenseMap<Value *, Value *> InverseMap;
- for (BasicBlock::iterator BI = ALoop->getHeader()->begin(),
- BE = ALoop->getHeader()->end(); BI != BE; ++BI) {
- if (PHINode *PN = dyn_cast<PHINode>(BI)) {
- PHINode *PNClone = cast<PHINode>(VMap[PN]);
- InverseMap[PNClone] = PN;
- } else
- break;
- }
-
- BasicBlock *A_Preheader = ALoop->getLoopPreheader();
- for (BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end();
- BI != BE; ++BI) {
- if (PHINode *PN = dyn_cast<PHINode>(BI)) {
- // Remove incoming value from original preheader.
- PN->removeIncomingValue(A_Preheader);
-
- // Add incoming value from A_ExitingBlock.
- if (PN == B_IndVar)
- PN->addIncoming(BSV, A_ExitingBlock);
- else {
- PHINode *OrigPN = cast<PHINode>(InverseMap[PN]);
- Value *V2 = NULL;
- // If loop header is also loop exiting block then
- // OrigPN is incoming value for B loop header.
- if (A_ExitingBlock == ALoop->getHeader())
- V2 = OrigPN;
- else
- V2 = OrigPN->getIncomingValueForBlock(A_ExitingBlock);
- PN->addIncoming(V2, A_ExitingBlock);
- }
- } else
- break;
- }
-
- DT->changeImmediateDominator(B_Header, A_ExitingBlock);
- DF->changeImmediateDominator(B_Header, A_ExitingBlock, DT);
-
- // [*] Update BLoop's exit block. Its new predecessor is BLoop's exit
- // block. Remove incoming PHINode values from ALoop's exiting block.
- // Add new incoming values from BLoop's incoming exiting value.
- // Update BLoop exit block's dominator info..
- BasicBlock *B_ExitingBlock = cast<BasicBlock>(VMap[A_ExitingBlock]);
- for (BasicBlock::iterator BI = B_ExitBlock->begin(), BE = B_ExitBlock->end();
- BI != BE; ++BI) {
- if (PHINode *PN = dyn_cast<PHINode>(BI)) {
- PN->addIncoming(VMap[PN->getIncomingValueForBlock(A_ExitingBlock)],
- B_ExitingBlock);
- PN->removeIncomingValue(A_ExitingBlock);
- } else
- break;
- }
-
- DT->changeImmediateDominator(B_ExitBlock, B_ExitingBlock);
- DF->changeImmediateDominator(B_ExitBlock, B_ExitingBlock, DT);
-
- //[*] Split ALoop's exit edge. This creates a new block which
- // serves two purposes. First one is to hold PHINode defnitions
- // to ensure that ALoop's LCSSA form. Second use it to act
- // as a preheader for BLoop.
- BasicBlock *A_ExitBlock = SplitEdge(A_ExitingBlock, B_Header, this);
-
- //[*] Preserve ALoop's LCSSA form. Create new forwarding PHINodes
- // in A_ExitBlock to redefine outgoing PHI definitions from ALoop.
- for(BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end();
- BI != BE; ++BI) {
- if (PHINode *PN = dyn_cast<PHINode>(BI)) {
- Value *V1 = PN->getIncomingValueForBlock(A_ExitBlock);
- PHINode *newPHI = PHINode::Create(PN->getType(), PN->getName());
- newPHI->addIncoming(V1, A_ExitingBlock);
- A_ExitBlock->getInstList().push_front(newPHI);
- PN->removeIncomingValue(A_ExitBlock);
- PN->addIncoming(newPHI, A_ExitBlock);
- } else
- break;
- }
-
- //[*] Eliminate split condition's inactive branch from ALoop.
- BasicBlock *A_SplitCondBlock = SplitCondition->getParent();
- BranchInst *A_BR = cast<BranchInst>(A_SplitCondBlock->getTerminator());
- BasicBlock *A_InactiveBranch = NULL;
- BasicBlock *A_ActiveBranch = NULL;
- A_ActiveBranch = A_BR->getSuccessor(0);
- A_InactiveBranch = A_BR->getSuccessor(1);
- A_BR->setUnconditionalDest(A_ActiveBranch);
- removeBlocks(A_InactiveBranch, L, A_ActiveBranch);
-
- //[*] Eliminate split condition's inactive branch in from BLoop.
- BasicBlock *B_SplitCondBlock = cast<BasicBlock>(VMap[A_SplitCondBlock]);
- BranchInst *B_BR = cast<BranchInst>(B_SplitCondBlock->getTerminator());
- BasicBlock *B_InactiveBranch = NULL;
- BasicBlock *B_ActiveBranch = NULL;
- B_ActiveBranch = B_BR->getSuccessor(1);
- B_InactiveBranch = B_BR->getSuccessor(0);
- B_BR->setUnconditionalDest(B_ActiveBranch);
- removeBlocks(B_InactiveBranch, BLoop, B_ActiveBranch);
-
- BasicBlock *A_Header = ALoop->getHeader();
- if (A_ExitingBlock == A_Header)
- return true;
-
- //[*] Move exit condition into split condition block to avoid
- // executing dead loop iteration.
- ICmpInst *B_ExitCondition = cast<ICmpInst>(VMap[ExitCondition]);
- Instruction *B_IndVarIncrement = cast<Instruction>(VMap[IVIncrement]);
- ICmpInst *B_SplitCondition = cast<ICmpInst>(VMap[SplitCondition]);
-
- moveExitCondition(A_SplitCondBlock, A_ActiveBranch, A_ExitBlock, ExitCondition,
- cast<ICmpInst>(SplitCondition), IndVar, IVIncrement,
- ALoop, EVOpNum);
-
- moveExitCondition(B_SplitCondBlock, B_ActiveBranch,
- B_ExitBlock, B_ExitCondition,
- B_SplitCondition, B_IndVar, B_IndVarIncrement,
- BLoop, EVOpNum);
-
- ++NumIndexSplit;
- return true;
-}
-
-/// cleanBlock - A block is considered clean if all non terminal instructions
-/// are either, PHINodes, IV based.
-bool LoopIndexSplit::cleanBlock(BasicBlock *BB) {
- Instruction *Terminator = BB->getTerminator();
- for(BasicBlock::iterator BI = BB->begin(), BE = BB->end();
- BI != BE; ++BI) {
- Instruction *I = BI;
-
- if (isa<PHINode>(I) || I == Terminator || I == ExitCondition
- || I == SplitCondition || IVBasedValues.count(I)
- || isa<DbgInfoIntrinsic>(I))
- continue;
-
- if (I->mayHaveSideEffects())
- return false;
-
- // I is used only inside this block then it is OK.
- bool usedOutsideBB = false;
- for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
- UI != UE; ++UI) {
- Instruction *U = cast<Instruction>(*UI);
- if (U->getParent() != BB)
- usedOutsideBB = true;
- }
- if (!usedOutsideBB)
- continue;
-
- // Otherwise we have a instruction that may not allow loop spliting.
- return false;
- }
- return true;
-}
-
-/// IVisLT - If Op is comparing IV based value with an loop invariant and
-/// IV based value is less than the loop invariant then return the loop
-/// invariant. Otherwise return NULL.
-Value * LoopIndexSplit::IVisLT(ICmpInst &Op) {
- ICmpInst::Predicate P = Op.getPredicate();
- if ((P == ICmpInst::ICMP_SLT || P == ICmpInst::ICMP_ULT)
- && IVBasedValues.count(Op.getOperand(0))
- && L->isLoopInvariant(Op.getOperand(1)))
- return Op.getOperand(1);
-
- if ((P == ICmpInst::ICMP_SGT || P == ICmpInst::ICMP_UGT)
- && IVBasedValues.count(Op.getOperand(1))
- && L->isLoopInvariant(Op.getOperand(0)))
- return Op.getOperand(0);
-
- return NULL;
-}
-
-/// IVisLE - If Op is comparing IV based value with an loop invariant and
-/// IV based value is less than or equal to the loop invariant then
-/// return the loop invariant. Otherwise return NULL.
-Value * LoopIndexSplit::IVisLE(ICmpInst &Op) {
- ICmpInst::Predicate P = Op.getPredicate();
- if ((P == ICmpInst::ICMP_SLE || P == ICmpInst::ICMP_ULE)
- && IVBasedValues.count(Op.getOperand(0))
- && L->isLoopInvariant(Op.getOperand(1)))
- return Op.getOperand(1);
-
- if ((P == ICmpInst::ICMP_SGE || P == ICmpInst::ICMP_UGE)
- && IVBasedValues.count(Op.getOperand(1))
- && L->isLoopInvariant(Op.getOperand(0)))
- return Op.getOperand(0);
-
- return NULL;
-}
-
-/// IVisGT - If Op is comparing IV based value with an loop invariant and
-/// IV based value is greater than the loop invariant then return the loop
-/// invariant. Otherwise return NULL.
-Value * LoopIndexSplit::IVisGT(ICmpInst &Op) {
- ICmpInst::Predicate P = Op.getPredicate();
- if ((P == ICmpInst::ICMP_SGT || P == ICmpInst::ICMP_UGT)
- && IVBasedValues.count(Op.getOperand(0))
- && L->isLoopInvariant(Op.getOperand(1)))
- return Op.getOperand(1);
-
- if ((P == ICmpInst::ICMP_SLT || P == ICmpInst::ICMP_ULT)
- && IVBasedValues.count(Op.getOperand(1))
- && L->isLoopInvariant(Op.getOperand(0)))
- return Op.getOperand(0);
-
- return NULL;
-}
-
-/// IVisGE - If Op is comparing IV based value with an loop invariant and
-/// IV based value is greater than or equal to the loop invariant then
-/// return the loop invariant. Otherwise return NULL.
-Value * LoopIndexSplit::IVisGE(ICmpInst &Op) {
- ICmpInst::Predicate P = Op.getPredicate();
- if ((P == ICmpInst::ICMP_SGE || P == ICmpInst::ICMP_UGE)
- && IVBasedValues.count(Op.getOperand(0))
- && L->isLoopInvariant(Op.getOperand(1)))
- return Op.getOperand(1);
-
- if ((P == ICmpInst::ICMP_SLE || P == ICmpInst::ICMP_ULE)
- && IVBasedValues.count(Op.getOperand(1))
- && L->isLoopInvariant(Op.getOperand(0)))
- return Op.getOperand(0);
-
- return NULL;
-}
-
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
new file mode 100644
index 0000000..af25c5c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -0,0 +1,170 @@
+//===- LoopInstSimplify.cpp - Loop Instruction Simplification Pass --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs lightweight instruction simplification on loop bodies.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-instsimplify"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of redundant instructions simplified");
+
+namespace {
+ class LoopInstSimplify : public LoopPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopInstSimplify() : LoopPass(ID) {
+ initializeLoopInstSimplifyPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop*, LPPassManager&);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved("scalar-evolution");
+ }
+ };
+}
+
+char LoopInstSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
+ "Simplify instructions in loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopInstSimplify, "loop-instsimplify",
+ "Simplify instructions in loops", false, false)
+
+Pass *llvm::createLoopInstSimplifyPass() {
+ return new LoopInstSimplify();
+}
+
+bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
+ DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+ LoopInfo *LI = &getAnalysis<LoopInfo>();
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getUniqueExitBlocks(ExitBlocks);
+ array_pod_sort(ExitBlocks.begin(), ExitBlocks.end());
+
+ SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
+
+ // The bit we are stealing from the pointer represents whether this basic
+ // block is the header of a subloop, in which case we only process its phis.
+ typedef PointerIntPair<BasicBlock*, 1> WorklistItem;
+ SmallVector<WorklistItem, 16> VisitStack;
+ SmallPtrSet<BasicBlock*, 32> Visited;
+
+ bool Changed = false;
+ bool LocalChanged;
+ do {
+ LocalChanged = false;
+
+ VisitStack.clear();
+ Visited.clear();
+
+ VisitStack.push_back(WorklistItem(L->getHeader(), false));
+
+ while (!VisitStack.empty()) {
+ WorklistItem Item = VisitStack.pop_back_val();
+ BasicBlock *BB = Item.getPointer();
+ bool IsSubloopHeader = Item.getInt();
+
+ // Simplify instructions in the current basic block.
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
+ Instruction *I = BI++;
+
+ // The first time through the loop ToSimplify is empty and we try to
+ // simplify all instructions. On later iterations ToSimplify is not
+ // empty and we only bother simplifying instructions that are in it.
+ if (!ToSimplify->empty() && !ToSimplify->count(I))
+ continue;
+
+ // Don't bother simplifying unused instructions.
+ if (!I->use_empty()) {
+ Value *V = SimplifyInstruction(I, TD, DT);
+ if (V && LI->replacementPreservesLCSSAForm(I, V)) {
+ // Mark all uses for resimplification next time round the loop.
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ Next->insert(cast<Instruction>(*UI));
+
+ I->replaceAllUsesWith(V);
+ LocalChanged = true;
+ ++NumSimplified;
+ }
+ }
+ LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I);
+
+ if (IsSubloopHeader && !isa<PHINode>(I))
+ break;
+ }
+
+ // Add all successors to the worklist, except for loop exit blocks and the
+ // bodies of subloops. We visit the headers of loops so that we can process
+ // their phis, but we contract the rest of the subloop body and only follow
+ // edges leading back to the original loop.
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
+ ++SI) {
+ BasicBlock *SuccBB = *SI;
+ if (!Visited.insert(SuccBB))
+ continue;
+
+ const Loop *SuccLoop = LI->getLoopFor(SuccBB);
+ if (SuccLoop && SuccLoop->getHeader() == SuccBB
+ && L->contains(SuccLoop)) {
+ VisitStack.push_back(WorklistItem(SuccBB, true));
+
+ SmallVector<BasicBlock*, 8> SubLoopExitBlocks;
+ SuccLoop->getExitBlocks(SubLoopExitBlocks);
+
+ for (unsigned i = 0; i < SubLoopExitBlocks.size(); ++i) {
+ BasicBlock *ExitBB = SubLoopExitBlocks[i];
+ if (LI->getLoopFor(ExitBB) == L && Visited.insert(ExitBB))
+ VisitStack.push_back(WorklistItem(ExitBB, false));
+ }
+
+ continue;
+ }
+
+ bool IsExitBlock = std::binary_search(ExitBlocks.begin(),
+ ExitBlocks.end(), SuccBB);
+ if (IsExitBlock)
+ continue;
+
+ VisitStack.push_back(WorklistItem(SuccBB, false));
+ }
+ }
+
+ // Place the list of instructions to simplify on the next loop iteration
+ // into ToSimplify.
+ std::swap(ToSimplify, Next);
+ Next->clear();
+
+ Changed |= LocalChanged;
+ } while (LocalChanged);
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 65acc1d..95e1578 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -15,16 +15,16 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Function.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/Support/Debug.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallVector.h"
using namespace llvm;
#define MAX_HEADER_SIZE 16
@@ -35,16 +35,13 @@ namespace {
class LoopRotate : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopRotate() : LoopPass(ID) {}
-
- // Rotate Loop L as many times as possible. Return true if
- // loop is rotated at least once.
- bool runOnLoop(Loop *L, LPPassManager &LPM);
+ LoopRotate() : LoopPass(ID) {
+ initializeLoopRotatePass(*PassRegistry::getPassRegistry());
+ }
// LCSSA form makes instruction renaming easier.
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
@@ -54,79 +51,119 @@ namespace {
AU.addPreserved<ScalarEvolution>();
}
- // Helper functions
-
- /// Do actual work
- bool rotateLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool rotateLoop(Loop *L);
- /// Initialize local data
- void initialize();
-
- /// After loop rotation, loop pre-header has multiple sucessors.
- /// Insert one forwarding basic block to ensure that loop pre-header
- /// has only one successor.
- void preserveCanonicalLoopForm(LPPassManager &LPM);
-
private:
- Loop *L;
- BasicBlock *OrigHeader;
- BasicBlock *OrigPreHeader;
- BasicBlock *OrigLatch;
- BasicBlock *NewHeader;
- BasicBlock *Exit;
- LPPassManager *LPM_Ptr;
+ LoopInfo *LI;
};
}
char LoopRotate::ID = 0;
-INITIALIZE_PASS(LoopRotate, "loop-rotate", "Rotate Loops", false, false);
+INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
/// Rotate Loop L as many times as possible. Return true if
/// the loop is rotated at least once.
-bool LoopRotate::runOnLoop(Loop *Lp, LPPassManager &LPM) {
-
- bool RotatedOneLoop = false;
- initialize();
- LPM_Ptr = &LPM;
+bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
+ LI = &getAnalysis<LoopInfo>();
// One loop can be rotated multiple times.
- while (rotateLoop(Lp,LPM)) {
- RotatedOneLoop = true;
- initialize();
- }
+ bool MadeChange = false;
+ while (rotateLoop(L))
+ MadeChange = true;
- return RotatedOneLoop;
+ return MadeChange;
}
-/// Rotate loop LP. Return true if the loop is rotated.
-bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
- L = Lp;
-
- OrigPreHeader = L->getLoopPreheader();
- if (!OrigPreHeader) return false;
-
- OrigLatch = L->getLoopLatch();
- if (!OrigLatch) return false;
+/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
+/// old header into the preheader. If there were uses of the values produced by
+/// these instruction that were outside of the loop, we have to insert PHI nodes
+/// to merge the two values. Do this now.
+static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
+ BasicBlock *OrigPreheader,
+ ValueToValueMapTy &ValueMap) {
+ // Remove PHI node entries that are no longer live.
+ BasicBlock::iterator I, E = OrigHeader->end();
+ for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
+
+ // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
+ // as necessary.
+ SSAUpdater SSA;
+ for (I = OrigHeader->begin(); I != E; ++I) {
+ Value *OrigHeaderVal = I;
+
+ // If there are no uses of the value (e.g. because it returns void), there
+ // is nothing to rewrite.
+ if (OrigHeaderVal->use_empty())
+ continue;
+
+ Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal];
- OrigHeader = L->getHeader();
+ // The value now exits in two versions: the initial value in the preheader
+ // and the loop "next" value in the original header.
+ SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
+ SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
+ SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
+
+ // Visit each use of the OrigHeader instruction.
+ for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
+ UE = OrigHeaderVal->use_end(); UI != UE; ) {
+ // Grab the use before incrementing the iterator.
+ Use &U = UI.getUse();
+
+ // Increment the iterator before removing the use from the list.
+ ++UI;
+
+ // SSAUpdater can't handle a non-PHI use in the same block as an
+ // earlier def. We can easily handle those cases manually.
+ Instruction *UserInst = cast<Instruction>(U.getUser());
+ if (!isa<PHINode>(UserInst)) {
+ BasicBlock *UserBB = UserInst->getParent();
+
+ // The original users in the OrigHeader are already using the
+ // original definitions.
+ if (UserBB == OrigHeader)
+ continue;
+
+ // Users in the OrigPreHeader need to use the value to which the
+ // original definitions are mapped.
+ if (UserBB == OrigPreheader) {
+ U = OrigPreHeaderVal;
+ continue;
+ }
+ }
+
+ // Anything else can be handled by SSAUpdater.
+ SSA.RewriteUse(U);
+ }
+ }
+}
+/// Rotate loop LP. Return true if the loop is rotated.
+bool LoopRotate::rotateLoop(Loop *L) {
// If the loop has only one block then there is not much to rotate.
if (L->getBlocks().size() == 1)
return false;
-
+
+ BasicBlock *OrigHeader = L->getHeader();
+
+ BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
+ if (BI == 0 || BI->isUnconditional())
+ return false;
+
// If the loop header is not one of the loop exiting blocks then
// either this loop is already rotated or it is not
// suitable for loop rotation transformations.
if (!L->isLoopExiting(OrigHeader))
return false;
- BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
- if (!BI)
- return false;
- assert(BI->isConditional() && "Branch Instruction is not conditional");
-
// Updating PHInodes in loops with multiple exits adds complexity.
// Keep it simple, and restrict loop rotation to loops with one exit only.
// In future, lift this restriction and support for multiple exits if
@@ -136,24 +173,18 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
if (ExitBlocks.size() > 1)
return false;
- // Check size of original header and reject
- // loop if it is very big.
- unsigned Size = 0;
-
- // FIXME: Use common api to estimate size.
- for (BasicBlock::const_iterator OI = OrigHeader->begin(),
- OE = OrigHeader->end(); OI != OE; ++OI) {
- if (isa<PHINode>(OI))
- continue; // PHI nodes don't count.
- if (isa<DbgInfoIntrinsic>(OI))
- continue; // Debug intrinsics don't count as size.
- ++Size;
+ // Check size of original header and reject loop if it is very big.
+ {
+ CodeMetrics Metrics;
+ Metrics.analyzeBasicBlock(OrigHeader);
+ if (Metrics.NumInsts > MAX_HEADER_SIZE)
+ return false;
}
- if (Size > MAX_HEADER_SIZE)
- return false;
-
// Now, this loop is suitable for rotation.
+ BasicBlock *OrigPreheader = L->getLoopPreheader();
+ BasicBlock *OrigLatch = L->getLoopLatch();
+ assert(OrigPreheader && OrigLatch && "Loop not in canonical form?");
// Anything ScalarEvolution may know about this loop or the PHI nodes
// in its header will soon be invalidated.
@@ -163,8 +194,8 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
// Find new Loop header. NewHeader is a Header's one and only successor
// that is inside loop. Header's other successor is outside the
// loop. Otherwise loop is not suitable for rotation.
- Exit = BI->getSuccessor(0);
- NewHeader = BI->getSuccessor(1);
+ BasicBlock *Exit = BI->getSuccessor(0);
+ BasicBlock *NewHeader = BI->getSuccessor(1);
if (L->contains(Exit))
std::swap(Exit, NewHeader);
assert(NewHeader && "Unable to determine new loop header");
@@ -180,20 +211,54 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
// Begin by walking OrigHeader and populating ValueMap with an entry for
// each Instruction.
BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
- DenseMap<const Value *, Value *> ValueMap;
+ ValueToValueMapTy ValueMap;
// For PHI nodes, the value available in OldPreHeader is just the
// incoming value from OldPreHeader.
for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
- ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreHeader));
+ ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
- // For the rest of the instructions, create a clone in the OldPreHeader.
- TerminatorInst *LoopEntryBranch = OrigPreHeader->getTerminator();
- for (; I != E; ++I) {
- Instruction *C = I->clone();
- C->setName(I->getName());
- C->insertBefore(LoopEntryBranch);
- ValueMap[I] = C;
+ // For the rest of the instructions, either hoist to the OrigPreheader if
+ // possible or create a clone in the OldPreHeader if not.
+ TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
+ while (I != E) {
+ Instruction *Inst = I++;
+
+ // If the instruction's operands are invariant and it doesn't read or write
+ // memory, then it is safe to hoist. Doing this doesn't change the order of
+ // execution in the preheader, but does prevent the instruction from
+ // executing in each iteration of the loop. This means it is safe to hoist
+ // something that might trap, but isn't safe to hoist something that reads
+ // memory (without proving that the loop doesn't write).
+ if (L->hasLoopInvariantOperands(Inst) &&
+ !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() &&
+ !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) {
+ Inst->moveBefore(LoopEntryBranch);
+ continue;
+ }
+
+ // Otherwise, create a duplicate of the instruction.
+ Instruction *C = Inst->clone();
+
+ // Eagerly remap the operands of the instruction.
+ RemapInstruction(C, ValueMap,
+ RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+
+ // With the operands remapped, see if the instruction constant folds or is
+ // otherwise simplifyable. This commonly occurs because the entry from PHI
+ // nodes allows icmps and other instructions to fold.
+ Value *V = SimplifyInstruction(C);
+ if (V && LI->replacementPreservesLCSSAForm(C, V)) {
+ // If so, then delete the temporary instruction and stick the folded value
+ // in the map.
+ delete C;
+ ValueMap[Inst] = V;
+ } else {
+ // Otherwise, stick the new instruction into the new block!
+ C->setName(Inst->getName());
+ C->insertBefore(LoopEntryBranch);
+ ValueMap[Inst] = C;
+ }
}
// Along with all the other instructions, we just cloned OrigHeader's
@@ -203,221 +268,81 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin();
PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
- PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreHeader);
+ PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
// Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
// OrigPreHeader's old terminator (the original branch into the loop), and
// remove the corresponding incoming values from the PHI nodes in OrigHeader.
LoopEntryBranch->eraseFromParent();
- for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
- PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreHeader));
- // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
- // as necessary.
- SSAUpdater SSA;
- for (I = OrigHeader->begin(); I != E; ++I) {
- Value *OrigHeaderVal = I;
- Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal];
-
- // The value now exits in two versions: the initial value in the preheader
- // and the loop "next" value in the original header.
- SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
- SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
- SSA.AddAvailableValue(OrigPreHeader, OrigPreHeaderVal);
-
- // Visit each use of the OrigHeader instruction.
- for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
- UE = OrigHeaderVal->use_end(); UI != UE; ) {
- // Grab the use before incrementing the iterator.
- Use &U = UI.getUse();
-
- // Increment the iterator before removing the use from the list.
- ++UI;
-
- // SSAUpdater can't handle a non-PHI use in the same block as an
- // earlier def. We can easily handle those cases manually.
- Instruction *UserInst = cast<Instruction>(U.getUser());
- if (!isa<PHINode>(UserInst)) {
- BasicBlock *UserBB = UserInst->getParent();
-
- // The original users in the OrigHeader are already using the
- // original definitions.
- if (UserBB == OrigHeader)
- continue;
-
- // Users in the OrigPreHeader need to use the value to which the
- // original definitions are mapped.
- if (UserBB == OrigPreHeader) {
- U = OrigPreHeaderVal;
- continue;
- }
- }
-
- // Anything else can be handled by SSAUpdater.
- SSA.RewriteUse(U);
- }
- }
+ // If there were any uses of instructions in the duplicated block outside the
+ // loop, update them, inserting PHI nodes as required
+ RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap);
// NewHeader is now the header of the loop.
L->moveToHeader(NewHeader);
+ assert(L->getHeader() == NewHeader && "Latch block is our new header");
- // Move the original header to the bottom of the loop, where it now more
- // naturally belongs. This isn't necessary for correctness, and CodeGen can
- // usually reorder blocks on its own to fix things like this up, but it's
- // still nice to keep the IR readable.
- //
- // The original header should have only one predecessor at this point, since
- // we checked that the loop had a proper preheader and unique backedge before
- // we started.
- assert(OrigHeader->getSinglePredecessor() &&
- "Original loop header has too many predecessors after loop rotation!");
- OrigHeader->moveAfter(OrigHeader->getSinglePredecessor());
-
- // Also, since this original header only has one predecessor, zap its
- // PHI nodes, which are now trivial.
- FoldSingleEntryPHINodes(OrigHeader);
-
- // TODO: We could just go ahead and merge OrigHeader into its predecessor
- // at this point, if we don't mind updating dominator info.
-
- // Establish a new preheader, update dominators, etc.
- preserveCanonicalLoopForm(LPM);
-
- ++NumRotated;
- return true;
-}
-
-/// Initialize local data
-void LoopRotate::initialize() {
- L = NULL;
- OrigHeader = NULL;
- OrigPreHeader = NULL;
- NewHeader = NULL;
- Exit = NULL;
-}
-
-/// After loop rotation, loop pre-header has multiple sucessors.
-/// Insert one forwarding basic block to ensure that loop pre-header
-/// has only one successor.
-void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) {
-
- // Right now original pre-header has two successors, new header and
- // exit block. Insert new block between original pre-header and
- // new header such that loop's new pre-header has only one successor.
- BasicBlock *NewPreHeader = BasicBlock::Create(OrigHeader->getContext(),
- "bb.nph",
- OrigHeader->getParent(),
- NewHeader);
- LoopInfo &LI = getAnalysis<LoopInfo>();
- if (Loop *PL = LI.getLoopFor(OrigPreHeader))
- PL->addBasicBlockToLoop(NewPreHeader, LI.getBase());
- BranchInst::Create(NewHeader, NewPreHeader);
- BranchInst *OrigPH_BI = cast<BranchInst>(OrigPreHeader->getTerminator());
- if (OrigPH_BI->getSuccessor(0) == NewHeader)
- OrigPH_BI->setSuccessor(0, NewPreHeader);
- else {
- assert(OrigPH_BI->getSuccessor(1) == NewHeader &&
- "Unexpected original pre-header terminator");
- OrigPH_BI->setSuccessor(1, NewPreHeader);
- }
-
- PHINode *PN;
- for (BasicBlock::iterator I = NewHeader->begin();
- (PN = dyn_cast<PHINode>(I)); ++I) {
- int index = PN->getBasicBlockIndex(OrigPreHeader);
- assert(index != -1 && "Expected incoming value from Original PreHeader");
- PN->setIncomingBlock(index, NewPreHeader);
- assert(PN->getBasicBlockIndex(OrigPreHeader) == -1 &&
- "Expected only one incoming value from Original PreHeader");
- }
-
- if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
- DT->addNewBlock(NewPreHeader, OrigPreHeader);
- DT->changeImmediateDominator(L->getHeader(), NewPreHeader);
- DT->changeImmediateDominator(Exit, OrigPreHeader);
- for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
- BI != BE; ++BI) {
- BasicBlock *B = *BI;
- if (L->getHeader() != B) {
- DomTreeNode *Node = DT->getNode(B);
- if (Node && Node->getBlock() == OrigHeader)
- DT->changeImmediateDominator(*BI, L->getHeader());
- }
- }
- DT->changeImmediateDominator(OrigHeader, OrigLatch);
- }
-
- if (DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>()) {
- // New Preheader's dominance frontier is Exit block.
- DominanceFrontier::DomSetType NewPHSet;
- NewPHSet.insert(Exit);
- DF->addBasicBlock(NewPreHeader, NewPHSet);
-
- // New Header's dominance frontier now includes itself and Exit block
- DominanceFrontier::iterator HeadI = DF->find(L->getHeader());
- if (HeadI != DF->end()) {
- DominanceFrontier::DomSetType & HeaderSet = HeadI->second;
- HeaderSet.clear();
- HeaderSet.insert(L->getHeader());
- HeaderSet.insert(Exit);
- } else {
- DominanceFrontier::DomSetType HeaderSet;
- HeaderSet.insert(L->getHeader());
- HeaderSet.insert(Exit);
- DF->addBasicBlock(L->getHeader(), HeaderSet);
- }
-
- // Original header (new Loop Latch)'s dominance frontier is Exit.
- DominanceFrontier::iterator LatchI = DF->find(L->getLoopLatch());
- if (LatchI != DF->end()) {
- DominanceFrontier::DomSetType &LatchSet = LatchI->second;
- LatchSet = LatchI->second;
- LatchSet.clear();
- LatchSet.insert(Exit);
- } else {
- DominanceFrontier::DomSetType LatchSet;
- LatchSet.insert(Exit);
- DF->addBasicBlock(L->getHeader(), LatchSet);
+ // At this point, we've finished our major CFG changes. As part of cloning
+ // the loop into the preheader we've simplified instructions and the
+ // duplicated conditional branch may now be branching on a constant. If it is
+ // branching on a constant and if that constant means that we enter the loop,
+ // then we fold away the cond branch to an uncond branch. This simplifies the
+ // loop in cases important for nested loops, and it also means we don't have
+ // to split as many edges.
+ BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
+ assert(PHBI->isConditional() && "Should be clone of BI condbr!");
+ if (!isa<ConstantInt>(PHBI->getCondition()) ||
+ PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero())
+ != NewHeader) {
+ // The conditional branch can't be folded, handle the general case.
+ // Update DominatorTree to reflect the CFG change we just made. Then split
+ // edges as necessary to preserve LoopSimplify form.
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
+ // Since OrigPreheader now has the conditional branch to Exit block, it is
+ // the dominator of Exit.
+ DT->changeImmediateDominator(Exit, OrigPreheader);
+ DT->changeImmediateDominator(NewHeader, OrigPreheader);
+
+ // Update OrigHeader to be dominated by the new header block.
+ DT->changeImmediateDominator(OrigHeader, OrigLatch);
}
-
- // If a loop block dominates new loop latch then add to its frontiers
- // new header and Exit and remove new latch (which is equal to original
- // header).
- BasicBlock *NewLatch = L->getLoopLatch();
-
- assert(NewLatch == OrigHeader && "NewLatch is inequal to OrigHeader");
-
+
+ // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
+ // thus is not a preheader anymore. Split the edge to form a real preheader.
+ BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this);
+ NewPH->setName(NewHeader->getName() + ".lr.ph");
+
+ // Preserve canonical loop form, which means that 'Exit' should have only one
+ // predecessor.
+ BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this);
+ ExitSplit->moveBefore(Exit);
+ } else {
+ // We can fold the conditional branch in the preheader, this makes things
+ // simpler. The first step is to remove the extra edge to the Exit block.
+ Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
+ BranchInst::Create(NewHeader, PHBI);
+ PHBI->eraseFromParent();
+
+ // With our CFG finalized, update DomTree if it is available.
if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
- for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
- BI != BE; ++BI) {
- BasicBlock *B = *BI;
- if (DT->dominates(B, NewLatch)) {
- DominanceFrontier::iterator BDFI = DF->find(B);
- if (BDFI != DF->end()) {
- DominanceFrontier::DomSetType &BSet = BDFI->second;
- BSet.erase(NewLatch);
- BSet.insert(L->getHeader());
- BSet.insert(Exit);
- } else {
- DominanceFrontier::DomSetType BSet;
- BSet.insert(L->getHeader());
- BSet.insert(Exit);
- DF->addBasicBlock(B, BSet);
- }
- }
- }
+ // Update OrigHeader to be dominated by the new header block.
+ DT->changeImmediateDominator(NewHeader, OrigPreheader);
+ DT->changeImmediateDominator(OrigHeader, OrigLatch);
}
}
-
- // Preserve canonical loop form, which means Exit block should
- // have only one predecessor.
- SplitEdge(L->getLoopLatch(), Exit, this);
-
- assert(NewHeader && L->getHeader() == NewHeader &&
- "Invalid loop header after loop rotation");
- assert(NewPreHeader && L->getLoopPreheader() == NewPreHeader &&
- "Invalid loop preheader after loop rotation");
- assert(L->getLoopLatch() &&
- "Invalid loop latch after loop rotation");
+
+ assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
+ assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
+
+ // Now that the CFG and DomTree are in a consistent state again, try to merge
+ // the OrigHeader block into OrigLatch. This will succeed if they are
+ // connected by an unconditional branch. This is just a cleanup so the
+ // emitted code isn't too gross in this common case.
+ MergeBlockIntoPredecessor(OrigHeader, this);
+
+ ++NumRotated;
+ return true;
}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index e8dc5d3..ac4aea2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -63,6 +63,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Assembly/Writer.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/SmallBitVector.h"
@@ -113,7 +114,7 @@ class RegUseTracker {
public:
void CountRegister(const SCEV *Reg, size_t LUIdx);
void DropRegister(const SCEV *Reg, size_t LUIdx);
- void DropUse(size_t LUIdx);
+ void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx);
bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
@@ -152,11 +153,19 @@ RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
}
void
-RegUseTracker::DropUse(size_t LUIdx) {
- // Remove the use index from every register's use list.
+RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
+ assert(LUIdx <= LastLUIdx);
+
+ // Update RegUses. The data structure is not optimized for this purpose;
+ // we must iterate through it and update each of the bit vectors.
for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end();
- I != E; ++I)
- I->second.UsedByIndices.reset(LUIdx);
+ I != E; ++I) {
+ SmallBitVector &UsedByIndices = I->second.UsedByIndices;
+ if (LUIdx < UsedByIndices.size())
+ UsedByIndices[LUIdx] =
+ LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0;
+ UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
+ }
}
bool
@@ -202,8 +211,7 @@ struct Formula {
Formula() : ScaledReg(0) {}
- void InitialMatch(const SCEV *S, Loop *L,
- ScalarEvolution &SE, DominatorTree &DT);
+ void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
unsigned getNumRegs() const;
const Type *getType() const;
@@ -224,9 +232,9 @@ struct Formula {
static void DoInitialMatch(const SCEV *S, Loop *L,
SmallVectorImpl<const SCEV *> &Good,
SmallVectorImpl<const SCEV *> &Bad,
- ScalarEvolution &SE, DominatorTree &DT) {
+ ScalarEvolution &SE) {
// Collect expressions which properly dominate the loop header.
- if (S->properlyDominates(L->getHeader(), &DT)) {
+ if (SE.properlyDominates(S, L->getHeader())) {
Good.push_back(S);
return;
}
@@ -235,18 +243,18 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I)
- DoInitialMatch(*I, L, Good, Bad, SE, DT);
+ DoInitialMatch(*I, L, Good, Bad, SE);
return;
}
// Look at addrec operands.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
if (!AR->getStart()->isZero()) {
- DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT);
+ DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
AR->getStepRecurrence(SE),
AR->getLoop()),
- L, Good, Bad, SE, DT);
+ L, Good, Bad, SE);
return;
}
@@ -258,7 +266,7 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
SmallVector<const SCEV *, 4> MyGood;
SmallVector<const SCEV *, 4> MyBad;
- DoInitialMatch(NewMul, L, MyGood, MyBad, SE, DT);
+ DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
SE.getEffectiveSCEVType(NewMul->getType())));
for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
@@ -278,11 +286,10 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
/// attempting to keep all loop-invariant and loop-computable values in a
/// single base register.
-void Formula::InitialMatch(const SCEV *S, Loop *L,
- ScalarEvolution &SE, DominatorTree &DT) {
+void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
SmallVector<const SCEV *, 4> Good;
SmallVector<const SCEV *, 4> Bad;
- DoInitialMatch(S, L, Good, Bad, SE, DT);
+ DoInitialMatch(S, L, Good, Bad, SE);
if (!Good.empty()) {
const SCEV *Sum = SE.getAddExpr(Good);
if (!Sum->isZero())
@@ -608,7 +615,7 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
bool Changed = false;
while (!DeadInsts.empty()) {
- Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
+ Instruction *I = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val());
if (I == 0 || !isInstructionTriviallyDead(I))
continue;
@@ -645,8 +652,6 @@ public:
: NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
SetupCost(0) {}
- unsigned getNumRegs() const { return NumRegs; }
-
bool operator<(const Cost &Other) const;
void Loose();
@@ -722,6 +727,9 @@ void Cost::RateRegister(const SCEV *Reg,
(isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
++SetupCost;
+
+ NumIVMuls += isa<SCEVMulExpr>(Reg) &&
+ SE.hasComputableLoopEvolution(Reg, L);
}
/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
@@ -756,9 +764,6 @@ void Cost::RateFormula(const Formula &F,
return;
}
RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
-
- NumIVMuls += isa<SCEVMulExpr>(BaseReg) &&
- BaseReg->hasComputableLoopEvolution(L);
}
if (F.BaseRegs.size() > 1)
@@ -1257,32 +1262,6 @@ struct UseMapDenseMapInfo {
}
};
-/// FormulaSorter - This class implements an ordering for formulae which sorts
-/// the by their standalone cost.
-class FormulaSorter {
- /// These two sets are kept empty, so that we compute standalone costs.
- DenseSet<const SCEV *> VisitedRegs;
- SmallPtrSet<const SCEV *, 16> Regs;
- Loop *L;
- LSRUse *LU;
- ScalarEvolution &SE;
- DominatorTree &DT;
-
-public:
- FormulaSorter(Loop *l, LSRUse &lu, ScalarEvolution &se, DominatorTree &dt)
- : L(l), LU(&lu), SE(se), DT(dt) {}
-
- bool operator()(const Formula &A, const Formula &B) {
- Cost CostA;
- CostA.RateFormula(A, Regs, VisitedRegs, L, LU->Offsets, SE, DT);
- Regs.clear();
- Cost CostB;
- CostB.RateFormula(B, Regs, VisitedRegs, L, LU->Offsets, SE, DT);
- Regs.clear();
- return CostA < CostB;
- }
-};
-
/// LSRInstance - This class holds state for the main loop strength reduction
/// logic.
class LSRInstance {
@@ -1341,7 +1320,7 @@ class LSRInstance {
LSRUse::KindType Kind,
const Type *AccessTy);
- void DeleteUse(LSRUse &LU);
+ void DeleteUse(LSRUse &LU, size_t LUIdx);
LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
@@ -1925,10 +1904,13 @@ LSRInstance::getUse(const SCEV *&Expr,
}
/// DeleteUse - Delete the given use from the Uses list.
-void LSRInstance::DeleteUse(LSRUse &LU) {
+void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
if (&LU != &Uses.back())
std::swap(LU, Uses.back());
Uses.pop_back();
+
+ // Update RegUses.
+ RegUses.SwapAndDropUse(LUIdx, Uses.size());
}
/// FindUseWithFormula - Look for a use distinct from OrigLU which is has
@@ -2073,7 +2055,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
- if (N->isLoopInvariant(L)) {
+ if (SE.isLoopInvariant(N, L)) {
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
}
@@ -2113,7 +2095,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
void
LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
Formula F;
- F.InitialMatch(S, L, SE, DT);
+ F.InitialMatch(S, L, SE);
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Initial formula already exists!"); (void)Inserted;
}
@@ -2213,7 +2195,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
unsigned OtherIdx = !UI.getOperandNo();
Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
- if (SE.getSCEV(OtherOp)->hasComputableLoopEvolution(L))
+ if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
continue;
}
@@ -2296,7 +2278,7 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// Loop-variant "unknown" values are uninteresting; we won't be able to
// do anything meaningful with them.
- if (isa<SCEVUnknown>(*J) && !(*J)->isLoopInvariant(L))
+ if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
continue;
// Don't pull a constant into a register if the constant could be folded
@@ -2347,8 +2329,8 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
for (SmallVectorImpl<const SCEV *>::const_iterator
I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
const SCEV *BaseReg = *I;
- if (BaseReg->properlyDominates(L->getHeader(), &DT) &&
- !BaseReg->hasComputableLoopEvolution(L))
+ if (SE.properlyDominates(BaseReg, L->getHeader()) &&
+ !SE.hasComputableLoopEvolution(BaseReg, L))
Ops.push_back(BaseReg);
else
F.BaseRegs.push_back(BaseReg);
@@ -2813,9 +2795,11 @@ LSRInstance::GenerateAllReuseFormulae() {
print_uses(dbgs()));
}
-/// If their are multiple formulae with the same set of registers used
+/// If there are multiple formulae with the same set of registers used
/// by other uses, pick the best one and delete the others.
void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
+ DenseSet<const SCEV *> VisitedRegs;
+ SmallPtrSet<const SCEV *, 16> Regs;
#ifndef NDEBUG
bool ChangedFormulae = false;
#endif
@@ -2828,7 +2812,6 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
- FormulaSorter Sorter(L, LU, SE, DT);
DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
bool Any = false;
@@ -2854,7 +2837,14 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
BestFormulae.insert(std::make_pair(Key, FIdx));
if (!P.second) {
Formula &Best = LU.Formulae[P.first->second];
- if (Sorter.operator()(F, Best))
+
+ Cost CostF;
+ CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
+ Regs.clear();
+ Cost CostBest;
+ CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
+ Regs.clear();
+ if (CostF < CostBest)
std::swap(F, Best);
DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
dbgs() << "\n"
@@ -2894,7 +2884,7 @@ static const size_t ComplexityLimit = UINT16_MAX;
/// this many solutions because it prune the search space, but the pruning
/// isn't always sufficient.
size_t LSRInstance::EstimateSearchSpaceComplexity() const {
- uint32_t Power = 1;
+ size_t Power = 1;
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
E = Uses.end(); I != E; ++I) {
size_t FSize = I->Formulae.size();
@@ -3001,6 +2991,28 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
+ // Update the relocs to reference the new use.
+ for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
+ E = Fixups.end(); I != E; ++I) {
+ LSRFixup &Fixup = *I;
+ if (Fixup.LUIdx == LUIdx) {
+ Fixup.LUIdx = LUThatHas - &Uses.front();
+ Fixup.Offset += F.AM.BaseOffs;
+ // Add the new offset to LUThatHas' offset list.
+ if (LUThatHas->Offsets.back() != Fixup.Offset) {
+ LUThatHas->Offsets.push_back(Fixup.Offset);
+ if (Fixup.Offset > LUThatHas->MaxOffset)
+ LUThatHas->MaxOffset = Fixup.Offset;
+ if (Fixup.Offset < LUThatHas->MinOffset)
+ LUThatHas->MinOffset = Fixup.Offset;
+ }
+ DEBUG(dbgs() << "New fixup has offset "
+ << Fixup.Offset << '\n');
+ }
+ if (Fixup.LUIdx == NumUses-1)
+ Fixup.LUIdx = LUIdx;
+ }
+
// Delete formulae from the new use which are no longer legal.
bool Any = false;
for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
@@ -3019,22 +3031,8 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
if (Any)
LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
- // Update the relocs to reference the new use.
- for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
- E = Fixups.end(); I != E; ++I) {
- LSRFixup &Fixup = *I;
- if (Fixup.LUIdx == LUIdx) {
- Fixup.LUIdx = LUThatHas - &Uses.front();
- Fixup.Offset += F.AM.BaseOffs;
- DEBUG(dbgs() << "New fixup has offset "
- << Fixup.Offset << '\n');
- }
- if (Fixup.LUIdx == NumUses-1)
- Fixup.LUIdx = LUIdx;
- }
-
// Delete the old use.
- DeleteUse(LU);
+ DeleteUse(LU, LUIdx);
--LUIdx;
--NumUses;
break;
@@ -3546,21 +3544,23 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
// is the canonical backedge for this loop, which complicates post-inc
// users.
if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
- !isa<IndirectBrInst>(BB->getTerminator()) &&
- (PN->getParent() != L->getHeader() || !L->contains(BB))) {
- // Split the critical edge.
- BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
-
- // If PN is outside of the loop and BB is in the loop, we want to
- // move the block to be immediately before the PHI block, not
- // immediately after BB.
- if (L->contains(BB) && !L->contains(PN))
- NewBB->moveBefore(PN->getParent());
-
- // Splitting the edge can reduce the number of PHI entries we have.
- e = PN->getNumIncomingValues();
- BB = NewBB;
- i = PN->getBasicBlockIndex(BB);
+ !isa<IndirectBrInst>(BB->getTerminator())) {
+ Loop *PNLoop = LI.getLoopFor(PN->getParent());
+ if (!PNLoop || PN->getParent() != PNLoop->getHeader()) {
+ // Split the critical edge.
+ BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
+
+ // If PN is outside of the loop and BB is in the loop, we want to
+ // move the block to be immediately before the PHI block, not
+ // immediately after BB.
+ if (L->contains(BB) && !L->contains(PN))
+ NewBB->moveBefore(PN->getParent());
+
+ // Splitting the edge can reduce the number of PHI entries we have.
+ e = PN->getNumIncomingValues();
+ BB = NewBB;
+ i = PN->getBasicBlockIndex(BB);
+ }
}
std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
@@ -3792,21 +3792,30 @@ private:
}
char LoopStrengthReduce::ID = 0;
-INITIALIZE_PASS(LoopStrengthReduce, "loop-reduce",
- "Loop Strength Reduction", false, false);
+INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
+ "Loop Strength Reduction", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(IVUsers)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
+ "Loop Strength Reduction", false, false)
+
Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
return new LoopStrengthReduce(TLI);
}
LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
- : LoopPass(ID), TLI(tli) {}
+ : LoopPass(ID), TLI(tli) {
+ initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
+ }
void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
// We split critical edges, so we change the CFG. However, we do update
// many analyses if they are around.
AU.addPreservedID(LoopSimplifyID);
- AU.addPreserved("domfrontier");
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
@@ -3815,6 +3824,9 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
AU.addRequired<ScalarEvolution>();
AU.addPreserved<ScalarEvolution>();
+ // Requiring LoopSimplify a second time here prevents IVUsers from running
+ // twice, since LoopSimplify was invalidated by running ScalarEvolution.
+ AU.addRequiredID(LoopSimplifyID);
AU.addRequired<IVUsers>();
AU.addPreserved<IVUsers>();
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index d0edfa2..80b263a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -16,7 +16,7 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -27,7 +27,7 @@
using namespace llvm;
static cl::opt<unsigned>
-UnrollThreshold("unroll-threshold", cl::init(200), cl::Hidden,
+UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
cl::desc("The cut-off point for automatic loop unrolling"));
static cl::opt<unsigned>
@@ -43,12 +43,20 @@ namespace {
class LoopUnroll : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopUnroll() : LoopPass(ID) {}
+ LoopUnroll() : LoopPass(ID) {
+ initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
+ }
/// A magic value for use with the Threshold parameter to indicate
/// that the loop unroll should be performed regardless of how much
/// code expansion would result.
static const unsigned NoThreshold = UINT_MAX;
+
+ // Threshold to use when optsize is specified (and there is no
+ // explicit -unroll-threshold).
+ static const unsigned OptSizeUnrollThreshold = 50;
+
+ unsigned CurrentThreshold;
bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -73,7 +81,11 @@ namespace {
}
char LoopUnroll::ID = 0;
-INITIALIZE_PASS(LoopUnroll, "loop-unroll", "Unroll loops", false, false);
+INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
@@ -83,8 +95,16 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) {
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
Metrics.analyzeBasicBlock(*I);
- NumCalls = Metrics.NumCalls;
- return Metrics.NumInsts;
+ NumCalls = Metrics.NumInlineCandidates;
+
+ unsigned LoopSize = Metrics.NumInsts;
+
+ // Don't allow an estimate of size zero. This would allows unrolling of loops
+ // with huge iteration counts, which is a compile time problem even if it's
+ // not a problem for code quality.
+ if (LoopSize == 0) LoopSize = 1;
+
+ return LoopSize;
}
bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
@@ -94,6 +114,15 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
<< "] Loop %" << Header->getName() << "\n");
(void)Header;
+
+ // Determine the current unrolling threshold. While this is normally set
+ // from UnrollThreshold, it is overridden to a smaller value if the current
+ // function is marked as optimize-for-size, and the unroll threshold was
+ // not user specified.
+ CurrentThreshold = UnrollThreshold;
+ if (Header->getParent()->hasFnAttr(Attribute::OptimizeForSize) &&
+ UnrollThreshold.getNumOccurrences() == 0)
+ CurrentThreshold = OptSizeUnrollThreshold;
// Find trip count
unsigned TripCount = L->getSmallConstantTripCount();
@@ -111,25 +140,25 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Enforce the threshold.
- if (UnrollThreshold != NoThreshold) {
- unsigned NumCalls;
- unsigned LoopSize = ApproximateLoopSize(L, NumCalls);
+ if (CurrentThreshold != NoThreshold) {
+ unsigned NumInlineCandidates;
+ unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates);
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
- if (NumCalls != 0) {
- DEBUG(dbgs() << " Not unrolling loop with function calls.\n");
+ if (NumInlineCandidates != 0) {
+ DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
return false;
}
uint64_t Size = (uint64_t)LoopSize*Count;
- if (TripCount != 1 && Size > UnrollThreshold) {
+ if (TripCount != 1 && Size > CurrentThreshold) {
DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
- << " because size: " << Size << ">" << UnrollThreshold << "\n");
+ << " because size: " << Size << ">" << CurrentThreshold << "\n");
if (!UnrollAllowPartial) {
DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
return false;
}
// Reduce unroll count to be modulo of TripCount for partial unrolling
- Count = UnrollThreshold / LoopSize;
+ Count = CurrentThreshold / LoopSize;
while (Count != 0 && TripCount%Count != 0) {
Count--;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 9afe428..b4e3d31 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -32,12 +32,12 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
-#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -93,7 +93,9 @@ namespace {
explicit LoopUnswitch(bool Os = false) :
LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
currentLoop(NULL), DT(NULL), loopHeader(NULL),
- loopPreheader(NULL) {}
+ loopPreheader(NULL) {
+ initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
+ }
bool runOnLoop(Loop *L, LPPassManager &LPM);
bool processCurrentLoop();
@@ -109,6 +111,7 @@ namespace {
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
AU.addPreserved<DominatorTree>();
+ AU.addPreserved<ScalarEvolution>();
}
private:
@@ -158,7 +161,13 @@ namespace {
};
}
char LoopUnswitch::ID = 0;
-INITIALIZE_PASS(LoopUnswitch, "loop-unswitch", "Unswitch loops", false, false);
+INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
+ false, false)
Pass *llvm::createLoopUnswitchPass(bool Os) {
return new LoopUnswitch(Os);
@@ -450,22 +459,9 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
return true;
}
-// RemapInstruction - Convert the instruction operands from referencing the
-// current values into those specified by VMap.
-//
-static inline void RemapInstruction(Instruction *I,
- ValueMap<const Value *, Value*> &VMap) {
- for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
- Value *Op = I->getOperand(op);
- ValueMap<const Value *, Value*>::iterator It = VMap.find(Op);
- if (It != VMap.end()) Op = It->second;
- I->setOperand(op, Op);
- }
-}
-
/// CloneLoop - Recursively clone the specified loop and all of its children,
/// mapping the blocks with the specified map.
-static Loop *CloneLoop(Loop *L, Loop *PL, ValueMap<const Value*, Value*> &VM,
+static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
LoopInfo *LI, LPPassManager *LPM) {
Loop *New = new Loop();
LPM->insertLoop(New, PL);
@@ -580,6 +576,9 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
<< " blocks] in Function " << F->getName()
<< " when '" << *Val << "' == " << *LIC << "\n");
+ if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
+ SE->forgetLoop(L);
+
LoopBlocks.clear();
NewBlocks.clear();
@@ -609,7 +608,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
// the loop preheader and exit blocks), keeping track of the mapping between
// the instructions and blocks.
NewBlocks.reserve(LoopBlocks.size());
- ValueMap<const Value*, Value*> VMap;
+ ValueToValueMapTy VMap;
for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
NewBlocks.push_back(NewBB);
@@ -647,7 +646,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) {
PN = cast<PHINode>(I);
Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
- ValueMap<const Value *, Value*>::iterator It = VMap.find(V);
+ ValueToValueMapTy::iterator It = VMap.find(V);
if (It != VMap.end()) V = It->second;
PN->addIncoming(V, NewExit);
}
@@ -657,7 +656,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
for (BasicBlock::iterator I = NewBlocks[i]->begin(),
E = NewBlocks[i]->end(); I != E; ++I)
- RemapInstruction(I, VMap);
+ RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
// Rewrite the original preheader to select between versions of the loop.
BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
@@ -961,13 +960,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
while (!Worklist.empty()) {
Instruction *I = Worklist.back();
Worklist.pop_back();
-
- // Simple constant folding.
- if (Constant *C = ConstantFoldInstruction(I)) {
- ReplaceUsesOfWith(I, C, Worklist, L, LPM);
- continue;
- }
-
+
// Simple DCE.
if (isInstructionTriviallyDead(I)) {
DEBUG(dbgs() << "Remove dead instruction '" << *I);
@@ -982,15 +975,16 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
++NumSimplify;
continue;
}
-
+
// See if instruction simplification can hack this up. This is common for
// things like "select false, X, Y" after unswitching made the condition be
// 'false'.
- if (Value *V = SimplifyInstruction(I)) {
- ReplaceUsesOfWith(I, V, Worklist, L, LPM);
- continue;
- }
-
+ if (Value *V = SimplifyInstruction(I, 0, DT))
+ if (LI->replacementPreservesLCSSAForm(I, V)) {
+ ReplaceUsesOfWith(I, V, Worklist, L, LPM);
+ continue;
+ }
+
// Special case hacks that appear commonly in unswitched code.
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
if (BI->isUnconditional()) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
index 973ffe7..9087b46 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -14,26 +14,15 @@
#define DEBUG_TYPE "loweratomic"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/BasicBlock.h"
#include "llvm/Function.h"
-#include "llvm/Instruction.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
#include "llvm/Support/IRBuilder.h"
-
using namespace llvm;
-namespace {
-
-bool LowerAtomicIntrinsic(CallInst *CI) {
- IRBuilder<> Builder(CI->getParent(), CI);
-
- Function *Callee = CI->getCalledFunction();
- if (!Callee)
- return false;
-
- unsigned IID = Callee->getIntrinsicID();
+static bool LowerAtomicIntrinsic(IntrinsicInst *II) {
+ IRBuilder<> Builder(II->getParent(), II);
+ unsigned IID = II->getIntrinsicID();
switch (IID) {
case Intrinsic::memory_barrier:
break;
@@ -48,80 +37,70 @@ bool LowerAtomicIntrinsic(CallInst *CI) {
case Intrinsic::atomic_load_min:
case Intrinsic::atomic_load_umax:
case Intrinsic::atomic_load_umin: {
- Value *Ptr = CI->getArgOperand(0);
- Value *Delta = CI->getArgOperand(1);
+ Value *Ptr = II->getArgOperand(0), *Delta = II->getArgOperand(1);
LoadInst *Orig = Builder.CreateLoad(Ptr);
Value *Res = NULL;
switch (IID) {
- default: assert(0 && "Unrecognized atomic modify operation");
- case Intrinsic::atomic_load_add:
- Res = Builder.CreateAdd(Orig, Delta);
- break;
- case Intrinsic::atomic_load_sub:
- Res = Builder.CreateSub(Orig, Delta);
- break;
- case Intrinsic::atomic_load_and:
- Res = Builder.CreateAnd(Orig, Delta);
- break;
- case Intrinsic::atomic_load_nand:
- Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta));
- break;
- case Intrinsic::atomic_load_or:
- Res = Builder.CreateOr(Orig, Delta);
- break;
- case Intrinsic::atomic_load_xor:
- Res = Builder.CreateXor(Orig, Delta);
- break;
- case Intrinsic::atomic_load_max:
- Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
- Delta,
- Orig);
- break;
- case Intrinsic::atomic_load_min:
- Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
- Orig,
- Delta);
- break;
- case Intrinsic::atomic_load_umax:
- Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
- Delta,
- Orig);
- break;
- case Intrinsic::atomic_load_umin:
- Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
- Orig,
- Delta);
- break;
+ default: assert(0 && "Unrecognized atomic modify operation");
+ case Intrinsic::atomic_load_add:
+ Res = Builder.CreateAdd(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_sub:
+ Res = Builder.CreateSub(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_and:
+ Res = Builder.CreateAnd(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_nand:
+ Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta));
+ break;
+ case Intrinsic::atomic_load_or:
+ Res = Builder.CreateOr(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_xor:
+ Res = Builder.CreateXor(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_max:
+ Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
+ Delta, Orig);
+ break;
+ case Intrinsic::atomic_load_min:
+ Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
+ Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_umax:
+ Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
+ Delta, Orig);
+ break;
+ case Intrinsic::atomic_load_umin:
+ Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
+ Orig, Delta);
+ break;
}
Builder.CreateStore(Res, Ptr);
- CI->replaceAllUsesWith(Orig);
+ II->replaceAllUsesWith(Orig);
break;
}
case Intrinsic::atomic_swap: {
- Value *Ptr = CI->getArgOperand(0);
- Value *Val = CI->getArgOperand(1);
-
+ Value *Ptr = II->getArgOperand(0), *Val = II->getArgOperand(1);
LoadInst *Orig = Builder.CreateLoad(Ptr);
Builder.CreateStore(Val, Ptr);
-
- CI->replaceAllUsesWith(Orig);
+ II->replaceAllUsesWith(Orig);
break;
}
case Intrinsic::atomic_cmp_swap: {
- Value *Ptr = CI->getArgOperand(0);
- Value *Cmp = CI->getArgOperand(1);
- Value *Val = CI->getArgOperand(2);
+ Value *Ptr = II->getArgOperand(0), *Cmp = II->getArgOperand(1);
+ Value *Val = II->getArgOperand(2);
LoadInst *Orig = Builder.CreateLoad(Ptr);
Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
Value *Res = Builder.CreateSelect(Equal, Val, Orig);
Builder.CreateStore(Res, Ptr);
-
- CI->replaceAllUsesWith(Orig);
+ II->replaceAllUsesWith(Orig);
break;
}
@@ -129,33 +108,32 @@ bool LowerAtomicIntrinsic(CallInst *CI) {
return false;
}
- assert(CI->use_empty() &&
+ assert(II->use_empty() &&
"Lowering should have eliminated any uses of the intrinsic call!");
- CI->eraseFromParent();
+ II->eraseFromParent();
return true;
}
-struct LowerAtomic : public BasicBlockPass {
- static char ID;
- LowerAtomic() : BasicBlockPass(ID) {}
- bool runOnBasicBlock(BasicBlock &BB) {
- bool Changed = false;
- for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) {
- Instruction *Inst = DI++;
- if (CallInst *CI = dyn_cast<CallInst>(Inst))
- Changed |= LowerAtomicIntrinsic(CI);
+namespace {
+ struct LowerAtomic : public BasicBlockPass {
+ static char ID;
+ LowerAtomic() : BasicBlockPass(ID) {
+ initializeLowerAtomicPass(*PassRegistry::getPassRegistry());
}
- return Changed;
- }
-
-};
-
+ bool runOnBasicBlock(BasicBlock &BB) {
+ bool Changed = false;
+ for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; )
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DI++))
+ Changed |= LowerAtomicIntrinsic(II);
+ return Changed;
+ }
+ };
}
char LowerAtomic::ID = 0;
INITIALIZE_PASS(LowerAtomic, "loweratomic",
"Lower atomic intrinsics to non-atomic form",
- false, false);
+ false, false)
Pass *llvm::createLowerAtomicPass() { return new LowerAtomic(); }
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 24fae42..bde0e53 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -14,16 +14,18 @@
#define DEBUG_TYPE "memcpyopt"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/GlobalVariable.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
#include <list>
@@ -32,62 +34,10 @@ using namespace llvm;
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
STATISTIC(NumMemSetInfer, "Number of memsets inferred");
STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
-
-/// isBytewiseValue - If the specified value can be set by repeating the same
-/// byte in memory, return the i8 value that it is represented with. This is
-/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
-/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated
-/// byte store (e.g. i16 0x1234), return null.
-static Value *isBytewiseValue(Value *V) {
- LLVMContext &Context = V->getContext();
-
- // All byte-wide stores are splatable, even of arbitrary variables.
- if (V->getType()->isIntegerTy(8)) return V;
-
- // Constant float and double values can be handled as integer values if the
- // corresponding integer value is "byteable". An important case is 0.0.
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
- if (CFP->getType()->isFloatTy())
- V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(Context));
- if (CFP->getType()->isDoubleTy())
- V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(Context));
- // Don't handle long double formats, which have strange constraints.
- }
-
- // We can handle constant integers that are power of two in size and a
- // multiple of 8 bits.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- unsigned Width = CI->getBitWidth();
- if (isPowerOf2_32(Width) && Width > 8) {
- // We can handle this value if the recursive binary decomposition is the
- // same at all levels.
- APInt Val = CI->getValue();
- APInt Val2;
- while (Val.getBitWidth() != 8) {
- unsigned NextWidth = Val.getBitWidth()/2;
- Val2 = Val.lshr(NextWidth);
- Val2.trunc(Val.getBitWidth()/2);
- Val.trunc(Val.getBitWidth()/2);
-
- // If the top/bottom halves aren't the same, reject it.
- if (Val != Val2)
- return 0;
- }
- return ConstantInt::get(Context, Val);
- }
- }
-
- // Conceptually, we could handle things like:
- // %a = zext i8 %X to i16
- // %b = shl i16 %a, 8
- // %c = or i16 %a, %b
- // but until there is an example that actually needs this, it doesn't seem
- // worth worrying about.
- return 0;
-}
+STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
- bool &VariableIdxFound, TargetData &TD) {
+ bool &VariableIdxFound, const TargetData &TD){
// Skip over the first indices.
gep_type_iterator GTI = gep_type_begin(GEP);
for (unsigned i = 1; i != Idx; ++i, ++GTI)
@@ -120,14 +70,31 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
/// constant offset, and return that constant offset. For example, Ptr1 might
/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8.
static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
- TargetData &TD) {
+ const TargetData &TD) {
+ Ptr1 = Ptr1->stripPointerCasts();
+ Ptr2 = Ptr2->stripPointerCasts();
+ GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
+ GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
+
+ bool VariableIdxFound = false;
+
+ // If one pointer is a GEP and the other isn't, then see if the GEP is a
+ // constant offset from the base, as in "P" and "gep P, 1".
+ if (GEP1 && GEP2 == 0 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
+ Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD);
+ return !VariableIdxFound;
+ }
+
+ if (GEP2 && GEP1 == 0 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
+ Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD);
+ return !VariableIdxFound;
+ }
+
// Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
// base. After that base, they may have some number of common (and
// potentially variable) indices. After that they handle some constant
// offset, which determines their offset from each other. At this point, we
// handle no other case.
- GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
- GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
return false;
@@ -137,7 +104,6 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
break;
- bool VariableIdxFound = false;
int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD);
int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
if (VariableIdxFound) return false;
@@ -171,7 +137,7 @@ struct MemsetRange {
unsigned Alignment;
/// TheStores - The actual stores that make up this range.
- SmallVector<StoreInst*, 16> TheStores;
+ SmallVector<Instruction*, 16> TheStores;
bool isProfitableToUseMemset(const TargetData &TD) const;
@@ -181,10 +147,19 @@ struct MemsetRange {
bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
// If we found more than 8 stores to merge or 64 bytes, use memset.
if (TheStores.size() >= 8 || End-Start >= 64) return true;
+
+ // If there is nothing to merge, don't do anything.
+ if (TheStores.size() < 2) return false;
+
+ // If any of the stores are a memset, then it is always good to extend the
+ // memset.
+ for (unsigned i = 0, e = TheStores.size(); i != e; ++i)
+ if (!isa<StoreInst>(TheStores[i]))
+ return true;
// Assume that the code generator is capable of merging pairs of stores
// together if it wants to.
- if (TheStores.size() <= 2) return false;
+ if (TheStores.size() == 2) return false;
// If we have fewer than 8 stores, it can still be worthwhile to do this.
// For example, merging 4 i8 stores into an i32 store is useful almost always.
@@ -215,31 +190,53 @@ class MemsetRanges {
/// because each element is relatively large and expensive to copy.
std::list<MemsetRange> Ranges;
typedef std::list<MemsetRange>::iterator range_iterator;
- TargetData &TD;
+ const TargetData &TD;
public:
- MemsetRanges(TargetData &td) : TD(td) {}
+ MemsetRanges(const TargetData &td) : TD(td) {}
typedef std::list<MemsetRange>::const_iterator const_iterator;
const_iterator begin() const { return Ranges.begin(); }
const_iterator end() const { return Ranges.end(); }
bool empty() const { return Ranges.empty(); }
- void addStore(int64_t OffsetFromFirst, StoreInst *SI);
+ void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ addStore(OffsetFromFirst, SI);
+ else
+ addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
+ }
+
+ void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
+ int64_t StoreSize = TD.getTypeStoreSize(SI->getOperand(0)->getType());
+
+ addRange(OffsetFromFirst, StoreSize,
+ SI->getPointerOperand(), SI->getAlignment(), SI);
+ }
+
+ void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
+ int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+ addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI);
+ }
+
+ void addRange(int64_t Start, int64_t Size, Value *Ptr,
+ unsigned Alignment, Instruction *Inst);
+
};
} // end anon namespace
-/// addStore - Add a new store to the MemsetRanges data structure. This adds a
+/// addRange - Add a new store to the MemsetRanges data structure. This adds a
/// new range for the specified store at the specified offset, merging into
/// existing ranges as appropriate.
-void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
- int64_t End = Start+TD.getTypeStoreSize(SI->getOperand(0)->getType());
-
- // Do a linear search of the ranges to see if this can be joined and/or to
- // find the insertion point in the list. We keep the ranges sorted for
- // simplicity here. This is a linear search of a linked list, which is ugly,
- // however the number of ranges is limited, so this won't get crazy slow.
+///
+/// Do a linear search of the ranges to see if this can be joined and/or to
+/// find the insertion point in the list. We keep the ranges sorted for
+/// simplicity here. This is a linear search of a linked list, which is ugly,
+/// however the number of ranges is limited, so this won't get crazy slow.
+void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
+ unsigned Alignment, Instruction *Inst) {
+ int64_t End = Start+Size;
range_iterator I = Ranges.begin(), E = Ranges.end();
while (I != E && Start > I->End)
@@ -252,14 +249,14 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
MemsetRange &R = *Ranges.insert(I, MemsetRange());
R.Start = Start;
R.End = End;
- R.StartPtr = SI->getPointerOperand();
- R.Alignment = SI->getAlignment();
- R.TheStores.push_back(SI);
+ R.StartPtr = Ptr;
+ R.Alignment = Alignment;
+ R.TheStores.push_back(Inst);
return;
}
-
+
// This store overlaps with I, add it.
- I->TheStores.push_back(SI);
+ I->TheStores.push_back(Inst);
// At this point, we may have an interval that completely contains our store.
// If so, just add it to the interval and return.
@@ -274,8 +271,8 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
// stopped on *it*.
if (Start < I->Start) {
I->Start = Start;
- I->StartPtr = SI->getPointerOperand();
- I->Alignment = SI->getAlignment();
+ I->StartPtr = Ptr;
+ I->Alignment = Alignment;
}
// Now we know that Start <= I->End and Start >= I->Start (so the startpoint
@@ -301,10 +298,16 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
namespace {
class MemCpyOpt : public FunctionPass {
- bool runOnFunction(Function &F);
+ MemoryDependenceAnalysis *MD;
+ const TargetData *TD;
public:
static char ID; // Pass identification, replacement for typeid
- MemCpyOpt() : FunctionPass(ID) {}
+ MemCpyOpt() : FunctionPass(ID) {
+ initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
+ MD = 0;
+ }
+
+ bool runOnFunction(Function &F);
private:
// This transformation requires dominator postdominator info
@@ -319,9 +322,17 @@ namespace {
// Helper fuctions
bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
+ bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
bool processMemCpy(MemCpyInst *M);
bool processMemMove(MemMoveInst *M);
- bool performCallSlotOptzn(MemCpyInst *cpy, CallInst *C);
+ bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
+ uint64_t cpyLen, CallInst *C);
+ bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
+ uint64_t MSize);
+ bool processByValArgument(CallSite CS, unsigned ArgNo);
+ Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
+ Value *ByteVal);
+
bool iterateOnFunction(Function &F);
};
@@ -331,165 +342,199 @@ namespace {
// createMemCpyOptPass - The public interface to this file...
FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
-INITIALIZE_PASS(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false);
-
-
+INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
+ false, false)
-/// processStore - When GVN is scanning forward over instructions, we look for
+/// tryMergingIntoMemset - When scanning forward over instructions, we look for
/// some other patterns to fold away. In particular, this looks for stores to
-/// neighboring locations of memory. If it sees enough consequtive ones
-/// (currently 4) it attempts to merge them together into a memcpy/memset.
-bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
- if (SI->isVolatile()) return false;
-
- LLVMContext &Context = SI->getContext();
-
- // There are two cases that are interesting for this code to handle: memcpy
- // and memset. Right now we only handle memset.
+/// neighboring locations of memory. If it sees enough consecutive ones, it
+/// attempts to merge them together into a memcpy/memset.
+Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
+ Value *StartPtr, Value *ByteVal) {
+ if (TD == 0) return 0;
- // Ensure that the value being stored is something that can be memset'able a
- // byte at a time like "0" or "-1" or any width, as well as things like
- // 0xA0A0A0A0 and 0.0.
- Value *ByteVal = isBytewiseValue(SI->getOperand(0));
- if (!ByteVal)
- return false;
-
- TargetData *TD = getAnalysisIfAvailable<TargetData>();
- if (!TD) return false;
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- Module *M = SI->getParent()->getParent()->getParent();
-
// Okay, so we now have a single store that can be splatable. Scan to find
// all subsequent stores of the same value to offset from the same pointer.
// Join these together into ranges, so we can decide whether contiguous blocks
// are stored.
MemsetRanges Ranges(*TD);
- Value *StartPtr = SI->getPointerOperand();
-
- BasicBlock::iterator BI = SI;
+ BasicBlock::iterator BI = StartInst;
for (++BI; !isa<TerminatorInst>(BI); ++BI) {
- if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) {
- // If the call is readnone, ignore it, otherwise bail out. We don't even
- // allow readonly here because we don't want something like:
+ if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
+ // If the instruction is readnone, ignore it, otherwise bail out. We
+ // don't even allow readonly here because we don't want something like:
// A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
- if (AA.getModRefBehavior(CallSite(BI)) ==
- AliasAnalysis::DoesNotAccessMemory)
- continue;
-
- // TODO: If this is a memset, try to join it in.
-
- break;
- } else if (isa<VAArgInst>(BI) || isa<LoadInst>(BI))
- break;
-
- // If this is a non-store instruction it is fine, ignore it.
- StoreInst *NextStore = dyn_cast<StoreInst>(BI);
- if (NextStore == 0) continue;
+ if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
+ break;
+ continue;
+ }
- // If this is a store, see if we can merge it in.
- if (NextStore->isVolatile()) break;
+ if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
+ // If this is a store, see if we can merge it in.
+ if (NextStore->isVolatile()) break;
- // Check to see if this stored value is of the same byte-splattable value.
- if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
- break;
-
- // Check to see if this store is to a constant offset from the start ptr.
- int64_t Offset;
- if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD))
- break;
-
- Ranges.addStore(Offset, NextStore);
+ // Check to see if this stored value is of the same byte-splattable value.
+ if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
+ break;
+
+ // Check to see if this store is to a constant offset from the start ptr.
+ int64_t Offset;
+ if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(),
+ Offset, *TD))
+ break;
+
+ Ranges.addStore(Offset, NextStore);
+ } else {
+ MemSetInst *MSI = cast<MemSetInst>(BI);
+
+ if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
+ !isa<ConstantInt>(MSI->getLength()))
+ break;
+
+ // Check to see if this store is to a constant offset from the start ptr.
+ int64_t Offset;
+ if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD))
+ break;
+
+ Ranges.addMemSet(Offset, MSI);
+ }
}
-
+
// If we have no ranges, then we just had a single store with nothing that
// could be merged in. This is a very common case of course.
if (Ranges.empty())
- return false;
+ return 0;
// If we had at least one store that could be merged in, add the starting
// store as well. We try to avoid this unless there is at least something
// interesting as a small compile-time optimization.
- Ranges.addStore(0, SI);
-
-
+ Ranges.addInst(0, StartInst);
+
+ // If we create any memsets, we put it right before the first instruction that
+ // isn't part of the memset block. This ensure that the memset is dominated
+ // by any addressing instruction needed by the start of the block.
+ IRBuilder<> Builder(BI);
+
// Now that we have full information about ranges, loop over the ranges and
// emit memset's for anything big enough to be worthwhile.
- bool MadeChange = false;
+ Instruction *AMemSet = 0;
for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
I != E; ++I) {
const MemsetRange &Range = *I;
-
+
if (Range.TheStores.size() == 1) continue;
// If it is profitable to lower this range to memset, do so now.
if (!Range.isProfitableToUseMemset(*TD))
continue;
- // Otherwise, we do want to transform this! Create a new memset. We put
- // the memset right before the first instruction that isn't part of this
- // memset block. This ensure that the memset is dominated by any addressing
- // instruction needed by the start of the block.
- BasicBlock::iterator InsertPt = BI;
-
+ // Otherwise, we do want to transform this! Create a new memset.
// Get the starting pointer of the block.
StartPtr = Range.StartPtr;
-
+
// Determine alignment
unsigned Alignment = Range.Alignment;
if (Alignment == 0) {
const Type *EltType =
- cast<PointerType>(StartPtr->getType())->getElementType();
+ cast<PointerType>(StartPtr->getType())->getElementType();
Alignment = TD->getABITypeAlignment(EltType);
}
-
- // Cast the start ptr to be i8* as memset requires.
- const PointerType* StartPTy = cast<PointerType>(StartPtr->getType());
- const PointerType *i8Ptr = Type::getInt8PtrTy(Context,
- StartPTy->getAddressSpace());
- if (StartPTy!= i8Ptr)
- StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(),
- InsertPt);
-
- Value *Ops[] = {
- StartPtr, ByteVal, // Start, value
- // size
- ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start),
- // align
- ConstantInt::get(Type::getInt32Ty(Context), Alignment),
- // volatile
- ConstantInt::get(Type::getInt1Ty(Context), 0),
- };
- const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
-
- Function *MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
-
- Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt);
+
+ AMemSet =
+ Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
+
DEBUG(dbgs() << "Replace stores:\n";
for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
- dbgs() << *Range.TheStores[i];
- dbgs() << "With: " << *C); C=C;
-
- // Don't invalidate the iterator
- BBI = BI;
-
+ dbgs() << *Range.TheStores[i] << '\n';
+ dbgs() << "With: " << *AMemSet << '\n');
+
// Zap all the stores.
- for (SmallVector<StoreInst*, 16>::const_iterator
+ for (SmallVector<Instruction*, 16>::const_iterator
SI = Range.TheStores.begin(),
- SE = Range.TheStores.end(); SI != SE; ++SI)
+ SE = Range.TheStores.end(); SI != SE; ++SI) {
+ MD->removeInstruction(*SI);
(*SI)->eraseFromParent();
+ }
++NumMemSetInfer;
- MadeChange = true;
}
- return MadeChange;
+ return AMemSet;
+}
+
+
+bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
+ if (SI->isVolatile()) return false;
+
+ if (TD == 0) return false;
+
+ // Detect cases where we're performing call slot forwarding, but
+ // happen to be using a load-store pair to implement it, rather than
+ // a memcpy.
+ if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
+ if (!LI->isVolatile() && LI->hasOneUse()) {
+ MemDepResult dep = MD->getDependency(LI);
+ CallInst *C = 0;
+ if (dep.isClobber() && !isa<MemCpyInst>(dep.getInst()))
+ C = dyn_cast<CallInst>(dep.getInst());
+
+ if (C) {
+ bool changed = performCallSlotOptzn(LI,
+ SI->getPointerOperand()->stripPointerCasts(),
+ LI->getPointerOperand()->stripPointerCasts(),
+ TD->getTypeStoreSize(SI->getOperand(0)->getType()), C);
+ if (changed) {
+ MD->removeInstruction(SI);
+ SI->eraseFromParent();
+ MD->removeInstruction(LI);
+ LI->eraseFromParent();
+ ++NumMemCpyInstr;
+ return true;
+ }
+ }
+ }
+ }
+
+ // There are two cases that are interesting for this code to handle: memcpy
+ // and memset. Right now we only handle memset.
+
+ // Ensure that the value being stored is something that can be memset'able a
+ // byte at a time like "0" or "-1" or any width, as well as things like
+ // 0xA0A0A0A0 and 0.0.
+ if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
+ if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
+ ByteVal)) {
+ BBI = I; // Don't invalidate iterator.
+ return true;
+ }
+
+ return false;
+}
+
+bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
+ // See if there is another memset or store neighboring this memset which
+ // allows us to widen out the memset to do a single larger store.
+ if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
+ if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
+ MSI->getValue())) {
+ BBI = I; // Don't invalidate iterator.
+ return true;
+ }
+ return false;
}
/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
/// and checks for the possibility of a call slot optimization by having
/// the call write its result directly into the destination of the memcpy.
-bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
+bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
+ Value *cpyDest, Value *cpySrc,
+ uint64_t cpyLen, CallInst *C) {
// The general transformation to keep in mind is
//
// call @func(..., src, ...)
@@ -506,24 +551,15 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
// Deliberately get the source and destination with bitcasts stripped away,
// because we'll need to do type comparisons based on the underlying type.
- Value *cpyDest = cpy->getDest();
- Value *cpySrc = cpy->getSource();
CallSite CS(C);
- // We need to be able to reason about the size of the memcpy, so we require
- // that it be a constant.
- ConstantInt *cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
- if (!cpyLength)
- return false;
-
// Require that src be an alloca. This simplifies the reasoning considerably.
AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
if (!srcAlloca)
return false;
// Check that all of src is copied to dest.
- TargetData *TD = getAnalysisIfAvailable<TargetData>();
- if (!TD) return false;
+ if (TD == 0) return false;
ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
if (!srcArraySize)
@@ -532,7 +568,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) *
srcArraySize->getZExtValue();
- if (cpyLength->getZExtValue() < srcSize)
+ if (cpyLen < srcSize)
return false;
// Check that accessing the first srcSize bytes of dest will not cause a
@@ -601,8 +637,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
// the use analysis, we also need to know that it does not sneakily
// access dest. We rely on AA to figure this out for us.
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) !=
- AliasAnalysis::NoModRef)
+ if (AA.getModRefInfo(C, cpyDest, srcSize) != AliasAnalysis::NoModRef)
return false;
// All the checks have passed, so do the transformation.
@@ -625,99 +660,142 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
// Drop any cached information about the call, because we may have changed
// its dependence information by changing its parameter.
- MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
- MD.removeInstruction(C);
+ MD->removeInstruction(C);
- // Remove the memcpy
- MD.removeInstruction(cpy);
- cpy->eraseFromParent();
+ // Remove the memcpy.
+ MD->removeInstruction(cpy);
++NumMemCpyInstr;
return true;
}
-/// processMemCpy - perform simplification of memcpy's. If we have memcpy A
-/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
-/// B to be a memcpy from X to Z (or potentially a memmove, depending on
-/// circumstances). This allows later passes to remove the first memcpy
-/// altogether.
-bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
- MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
-
- // The are two possible optimizations we can do for memcpy:
- // a) memcpy-memcpy xform which exposes redundance for DSE.
- // b) call-memcpy xform for return slot optimization.
- MemDepResult dep = MD.getDependency(M);
- if (!dep.isClobber())
- return false;
- if (!isa<MemCpyInst>(dep.getInst())) {
- if (CallInst *C = dyn_cast<CallInst>(dep.getInst()))
- return performCallSlotOptzn(M, C);
+/// processMemCpyMemCpyDependence - We've found that the (upward scanning)
+/// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to
+/// copy from MDep's input if we can. MSize is the size of M's copy.
+///
+bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
+ uint64_t MSize) {
+ // We can only transforms memcpy's where the dest of one is the source of the
+ // other.
+ if (M->getSource() != MDep->getDest() || MDep->isVolatile())
return false;
- }
-
- MemCpyInst *MDep = cast<MemCpyInst>(dep.getInst());
- // We can only transforms memcpy's where the dest of one is the source of the
- // other
- if (M->getSource() != MDep->getDest())
+ // If dep instruction is reading from our current input, then it is a noop
+ // transfer and substituting the input won't change this instruction. Just
+ // ignore the input and let someone else zap MDep. This handles cases like:
+ // memcpy(a <- a)
+ // memcpy(b <- a)
+ if (M->getSource() == MDep->getSource())
return false;
// Second, the length of the memcpy's must be the same, or the preceeding one
// must be larger than the following one.
- ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
- ConstantInt *C2 = dyn_cast<ConstantInt>(M->getLength());
- if (!C1 || !C2)
- return false;
-
- uint64_t DepSize = C1->getValue().getZExtValue();
- uint64_t CpySize = C2->getValue().getZExtValue();
-
- if (DepSize < CpySize)
+ ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
+ ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
+ if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
return false;
- // Finally, we have to make sure that the dest of the second does not
- // alias the source of the first
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=
- AliasAnalysis::NoAlias)
+
+ // Verify that the copied-from memory doesn't change in between the two
+ // transfers. For example, in:
+ // memcpy(a <- b)
+ // *b = 42;
+ // memcpy(c <- a)
+ // It would be invalid to transform the second memcpy into memcpy(c <- b).
+ //
+ // TODO: If the code between M and MDep is transparent to the destination "c",
+ // then we could still perform the xform by moving M up to the first memcpy.
+ //
+ // NOTE: This is conservative, it will stop on any read from the source loc,
+ // not just the defining memcpy.
+ MemDepResult SourceDep =
+ MD->getPointerDependencyFrom(AA.getLocationForSource(MDep),
+ false, M, M->getParent());
+ if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
return false;
- else if (AA.alias(M->getRawDest(), CpySize, M->getRawSource(), CpySize) !=
- AliasAnalysis::NoAlias)
+
+ // If the dest of the second might alias the source of the first, then the
+ // source and dest might overlap. We still want to eliminate the intermediate
+ // value, but we have to generate a memmove instead of memcpy.
+ bool UseMemMove = false;
+ if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep)))
+ UseMemMove = true;
+
+ // If all checks passed, then we can transform M.
+
+ // Make sure to use the lesser of the alignment of the source and the dest
+ // since we're changing where we're reading from, but don't want to increase
+ // the alignment past what can be read from or written to.
+ // TODO: Is this worth it if we're creating a less aligned memcpy? For
+ // example we could be moving from movaps -> movq on x86.
+ unsigned Align = std::min(MDep->getAlignment(), M->getAlignment());
+
+ IRBuilder<> Builder(M);
+ if (UseMemMove)
+ Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(),
+ Align, M->isVolatile());
+ else
+ Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(),
+ Align, M->isVolatile());
+
+ // Remove the instruction we're replacing.
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ ++NumMemCpyInstr;
+ return true;
+}
+
+
+/// processMemCpy - perform simplification of memcpy's. If we have memcpy A
+/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
+/// B to be a memcpy from X to Z (or potentially a memmove, depending on
+/// circumstances). This allows later passes to remove the first memcpy
+/// altogether.
+bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
+ // We can only optimize statically-sized memcpy's that are non-volatile.
+ ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
+ if (CopySize == 0 || M->isVolatile()) return false;
+
+ // If the source and destination of the memcpy are the same, then zap it.
+ if (M->getSource() == M->getDest()) {
+ MD->removeInstruction(M);
+ M->eraseFromParent();
return false;
- else if (AA.alias(MDep->getRawDest(), DepSize, MDep->getRawSource(), DepSize)
- != AliasAnalysis::NoAlias)
+ }
+
+ // If copying from a constant, try to turn the memcpy into a memset.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer())
+ if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
+ IRBuilder<> Builder(M);
+ Builder.CreateMemSet(M->getRawDest(), ByteVal, CopySize,
+ M->getAlignment(), false);
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ ++NumCpyToSet;
+ return true;
+ }
+
+ // The are two possible optimizations we can do for memcpy:
+ // a) memcpy-memcpy xform which exposes redundance for DSE.
+ // b) call-memcpy xform for return slot optimization.
+ MemDepResult DepInfo = MD->getDependency(M);
+ if (!DepInfo.isClobber())
return false;
- // If all checks passed, then we can transform these memcpy's
- const Type *ArgTys[3] = { M->getRawDest()->getType(),
- MDep->getRawSource()->getType(),
- M->getLength()->getType() };
- Function *MemCpyFun = Intrinsic::getDeclaration(
- M->getParent()->getParent()->getParent(),
- M->getIntrinsicID(), ArgTys, 3);
+ if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()))
+ return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
- Value *Args[5] = {
- M->getRawDest(), MDep->getRawSource(), M->getLength(),
- M->getAlignmentCst(), M->getVolatileCst()
- };
-
- CallInst *C = CallInst::Create(MemCpyFun, Args, Args+5, "", M);
-
-
- // If C and M don't interfere, then this is a valid transformation. If they
- // did, this would mean that the two sources overlap, which would be bad.
- if (MD.getDependency(C) == dep) {
- MD.removeInstruction(M);
- M->eraseFromParent();
- ++NumMemCpyInstr;
- return true;
+ if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
+ if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
+ CopySize->getZExtValue(), C)) {
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ return true;
+ }
}
- // Otherwise, there was no point in doing this, so we remove the call we
- // inserted and act like nothing happened.
- MD.removeInstruction(C);
- C->eraseFromParent();
return false;
}
@@ -726,15 +804,8 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
bool MemCpyOpt::processMemMove(MemMoveInst *M) {
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- // If the memmove is a constant size, use it for the alias query, this allows
- // us to optimize things like: memmove(P, P+64, 64);
- uint64_t MemMoveSize = ~0ULL;
- if (ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength()))
- MemMoveSize = Len->getZExtValue();
-
// See if the pointers alias.
- if (AA.alias(M->getRawDest(), MemMoveSize, M->getRawSource(), MemMoveSize) !=
- AliasAnalysis::NoAlias)
+ if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M)))
return false;
DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
@@ -749,33 +820,107 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
// MemDep may have over conservative information about this instruction, just
// conservatively flush it from the cache.
- getAnalysis<MemoryDependenceAnalysis>().removeInstruction(M);
+ MD->removeInstruction(M);
++NumMoveToCpy;
return true;
}
+/// processByValArgument - This is called on every byval argument in call sites.
+bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
+ if (TD == 0) return false;
+
+ // Find out what feeds this byval argument.
+ Value *ByValArg = CS.getArgument(ArgNo);
+ const Type *ByValTy =cast<PointerType>(ByValArg->getType())->getElementType();
+ uint64_t ByValSize = TD->getTypeAllocSize(ByValTy);
+ MemDepResult DepInfo =
+ MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize),
+ true, CS.getInstruction(),
+ CS.getInstruction()->getParent());
+ if (!DepInfo.isClobber())
+ return false;
+
+ // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
+ // a memcpy, see if we can byval from the source of the memcpy instead of the
+ // result.
+ MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
+ if (MDep == 0 || MDep->isVolatile() ||
+ ByValArg->stripPointerCasts() != MDep->getDest())
+ return false;
+
+ // The length of the memcpy must be larger or equal to the size of the byval.
+ ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
+ if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize)
+ return false;
+
+ // Get the alignment of the byval. If it is greater than the memcpy, then we
+ // can't do the substitution. If the call doesn't specify the alignment, then
+ // it is some target specific value that we can't know.
+ unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
+ if (ByValAlign == 0 || MDep->getAlignment() < ByValAlign)
+ return false;
+
+ // Verify that the copied-from memory doesn't change in between the memcpy and
+ // the byval call.
+ // memcpy(a <- b)
+ // *b = 42;
+ // foo(*a)
+ // It would be invalid to transform the second memcpy into foo(*b).
+ //
+ // NOTE: This is conservative, it will stop on any read from the source loc,
+ // not just the defining memcpy.
+ MemDepResult SourceDep =
+ MD->getPointerDependencyFrom(AliasAnalysis::getLocationForSource(MDep),
+ false, CS.getInstruction(), MDep->getParent());
+ if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
+ return false;
+
+ Value *TmpCast = MDep->getSource();
+ if (MDep->getSource()->getType() != ByValArg->getType())
+ TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
+ "tmpcast", CS.getInstruction());
+
+ DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n"
+ << " " << *MDep << "\n"
+ << " " << *CS.getInstruction() << "\n");
+
+ // Otherwise we're good! Update the byval argument.
+ CS.setArgument(ArgNo, TmpCast);
+ ++NumMemCpyInstr;
+ return true;
+}
-// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN.
+/// iterateOnFunction - Executes one iteration of MemCpyOpt.
bool MemCpyOpt::iterateOnFunction(Function &F) {
bool MadeChange = false;
// Walk all instruction in the function.
for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
- BI != BE;) {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
// Avoid invalidating the iterator.
Instruction *I = BI++;
+ bool RepeatInstruction = false;
+
if (StoreInst *SI = dyn_cast<StoreInst>(I))
MadeChange |= processStore(SI, BI);
+ else if (MemSetInst *M = dyn_cast<MemSetInst>(I))
+ RepeatInstruction = processMemSet(M, BI);
else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
- MadeChange |= processMemCpy(M);
- else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) {
- if (processMemMove(M)) {
- --BI; // Reprocess the new memcpy.
- MadeChange = true;
- }
+ RepeatInstruction = processMemCpy(M);
+ else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
+ RepeatInstruction = processMemMove(M);
+ else if (CallSite CS = (Value*)I) {
+ for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
+ if (CS.paramHasAttr(i+1, Attribute::ByVal))
+ MadeChange |= processByValArgument(CS, i);
+ }
+
+ // Reprocess the instruction if desired.
+ if (RepeatInstruction) {
+ if (BI != BB->begin()) --BI;
+ MadeChange = true;
}
}
}
@@ -788,14 +933,14 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
//
bool MemCpyOpt::runOnFunction(Function &F) {
bool MadeChange = false;
+ MD = &getAnalysis<MemoryDependenceAnalysis>();
+ TD = getAnalysisIfAvailable<TargetData>();
while (1) {
if (!iterateOnFunction(F))
break;
MadeChange = true;
}
+ MD = 0;
return MadeChange;
}
-
-
-
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index b8afcc1..e093b52 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -77,7 +77,9 @@ namespace {
bool MadeChange;
public:
static char ID; // Pass identification, replacement for typeid
- Reassociate() : FunctionPass(ID) {}
+ Reassociate() : FunctionPass(ID) {
+ initializeReassociatePass(*PassRegistry::getPassRegistry());
+ }
bool runOnFunction(Function &F);
@@ -104,7 +106,7 @@ namespace {
char Reassociate::ID = 0;
INITIALIZE_PASS(Reassociate, "reassociate",
- "Reassociate expressions", false, false);
+ "Reassociate expressions", false, false)
// Public interface to the Reassociate pass
FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
@@ -238,6 +240,12 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) {
RHS->setOperand(0, LHS);
I->setOperand(0, RHS);
+ // Conservatively clear all the optional flags, which may not hold
+ // after the reassociation.
+ I->clearSubclassOptionalData();
+ LHS->clearSubclassOptionalData();
+ RHS->clearSubclassOptionalData();
+
++NumLinear;
MadeChange = true;
DEBUG(dbgs() << "Linearized: " << *I << '\n');
@@ -339,6 +347,12 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
DEBUG(dbgs() << "RA: " << *I << '\n');
I->setOperand(0, Ops[i].Op);
I->setOperand(1, Ops[i+1].Op);
+
+ // Clear all the optional flags, which may not hold after the
+ // reassociation if the expression involved more than just this operation.
+ if (Ops.size() != 2)
+ I->clearSubclassOptionalData();
+
DEBUG(dbgs() << "TO: " << *I << '\n');
MadeChange = true;
++NumChanged;
@@ -354,6 +368,11 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
if (I->getOperand(1) != Ops[i].Op) {
DEBUG(dbgs() << "RA: " << *I << '\n');
I->setOperand(1, Ops[i].Op);
+
+ // Conservatively clear all the optional flags, which may not hold
+ // after the reassociation.
+ I->clearSubclassOptionalData();
+
DEBUG(dbgs() << "TO: " << *I << '\n');
MadeChange = true;
++NumChanged;
@@ -809,16 +828,23 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
// RemoveFactorFromExpression on successive values to behave differently.
Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal);
SmallVector<Value*, 4> NewMulOps;
- for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ for (unsigned i = 0; i != Ops.size(); ++i) {
// Only try to remove factors from expressions we're allowed to.
BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op);
if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty())
continue;
if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
- NewMulOps.push_back(V);
- Ops.erase(Ops.begin()+i);
- --i; --e;
+ // The factorized operand may occur several times. Convert them all in
+ // one fell swoop.
+ for (unsigned j = Ops.size(); j != i;) {
+ --j;
+ if (Ops[j].Op == Ops[i].Op) {
+ NewMulOps.push_back(V);
+ Ops.erase(Ops.begin()+j);
+ }
+ }
+ --i;
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
index 506b72a..459bb06 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -36,7 +36,9 @@ STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted");
namespace {
struct RegToMem : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- RegToMem() : FunctionPass(ID) {}
+ RegToMem() : FunctionPass(ID) {
+ initializeRegToMemPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(BreakCriticalEdgesID);
@@ -59,9 +61,11 @@ namespace {
}
char RegToMem::ID = 0;
-INITIALIZE_PASS(RegToMem, "reg2mem", "Demote all values to stack slots",
- false, false);
-
+INITIALIZE_PASS_BEGIN(RegToMem, "reg2mem", "Demote all values to stack slots",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
+INITIALIZE_PASS_END(RegToMem, "reg2mem", "Demote all values to stack slots",
+ false, false)
bool RegToMem::runOnFunction(Function &F) {
if (F.isDeclaration())
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
index 6115c05..c82e929 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -481,6 +481,19 @@ private:
}
}
+ /// InsertInOverdefinedPHIs - Insert an entry in the UsersOfOverdefinedPHIS
+ /// map for I and PN, but if one is there already, do not create another.
+ /// (Duplicate entries do not break anything directly, but can lead to
+ /// exponential growth of the table in rare cases.)
+ void InsertInOverdefinedPHIs(Instruction *I, PHINode *PN) {
+ std::multimap<PHINode*, Instruction*>::iterator J, E;
+ tie(J, E) = UsersOfOverdefinedPHIs.equal_range(PN);
+ for (; J != E; ++J)
+ if (J->second == I)
+ return;
+ UsersOfOverdefinedPHIs.insert(std::make_pair(PN, I));
+ }
+
private:
friend class InstVisitor<SCCPSolver>;
@@ -973,9 +986,9 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
if (Result.isConstant()) {
markConstant(IV, &I, Result.getConstant());
// Remember that this instruction is virtually using the PHI node
- // operands.
- UsersOfOverdefinedPHIs.insert(std::make_pair(PN1, &I));
- UsersOfOverdefinedPHIs.insert(std::make_pair(PN2, &I));
+ // operands.
+ InsertInOverdefinedPHIs(&I, PN1);
+ InsertInOverdefinedPHIs(&I, PN2);
return;
}
@@ -1056,8 +1069,8 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
markConstant(&I, Result.getConstant());
// Remember that this instruction is virtually using the PHI node
// operands.
- UsersOfOverdefinedPHIs.insert(std::make_pair(PN1, &I));
- UsersOfOverdefinedPHIs.insert(std::make_pair(PN2, &I));
+ InsertInOverdefinedPHIs(&I, PN1);
+ InsertInOverdefinedPHIs(&I, PN2);
return;
}
@@ -1585,22 +1598,20 @@ namespace {
///
struct SCCP : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- SCCP() : FunctionPass(ID) {}
+ SCCP() : FunctionPass(ID) {
+ initializeSCCPPass(*PassRegistry::getPassRegistry());
+ }
// runOnFunction - Run the Sparse Conditional Constant Propagation
// algorithm, and return true if the function was modified.
//
bool runOnFunction(Function &F);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- }
};
} // end anonymous namespace
char SCCP::ID = 0;
INITIALIZE_PASS(SCCP, "sccp",
- "Sparse Conditional Constant Propagation", false, false);
+ "Sparse Conditional Constant Propagation", false, false)
// createSCCPPass - This is the public interface to this file.
FunctionPass *llvm::createSCCPPass() {
@@ -1701,7 +1712,9 @@ namespace {
///
struct IPSCCP : public ModulePass {
static char ID;
- IPSCCP() : ModulePass(ID) {}
+ IPSCCP() : ModulePass(ID) {
+ initializeIPSCCPPass(*PassRegistry::getPassRegistry());
+ }
bool runOnModule(Module &M);
};
} // end anonymous namespace
@@ -1709,7 +1722,7 @@ namespace {
char IPSCCP::ID = 0;
INITIALIZE_PASS(IPSCCP, "ipsccp",
"Interprocedural Sparse Conditional Constant Propagation",
- false, false);
+ false, false)
// createIPSCCPPass - This is the public interface to this file.
ModulePass *llvm::createIPSCCPPass() {
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
index cb03423..bf9ca6d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -7,12 +7,15 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the C bindings for libLLVMScalarOpts.a, which implements
-// several scalar transformations over the LLVM intermediate representation.
+// This file implements common infrastructure for libLLVMScalarOpts.a, which
+// implements several scalar transformations over the LLVM intermediate
+// representation, including the C bindings for that library.
//
//===----------------------------------------------------------------------===//
#include "llvm-c/Transforms/Scalar.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
#include "llvm/PassManager.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/Target/TargetData.h"
@@ -20,6 +23,50 @@
using namespace llvm;
+/// initializeScalarOptsPasses - Initialize all passes linked into the
+/// ScalarOpts library.
+void llvm::initializeScalarOpts(PassRegistry &Registry) {
+ initializeADCEPass(Registry);
+ initializeBlockPlacementPass(Registry);
+ initializeCodeGenPreparePass(Registry);
+ initializeConstantPropagationPass(Registry);
+ initializeCorrelatedValuePropagationPass(Registry);
+ initializeDCEPass(Registry);
+ initializeDeadInstEliminationPass(Registry);
+ initializeDSEPass(Registry);
+ initializeGEPSplitterPass(Registry);
+ initializeGVNPass(Registry);
+ initializeEarlyCSEPass(Registry);
+ initializeIndVarSimplifyPass(Registry);
+ initializeJumpThreadingPass(Registry);
+ initializeLICMPass(Registry);
+ initializeLoopDeletionPass(Registry);
+ initializeLoopInstSimplifyPass(Registry);
+ initializeLoopRotatePass(Registry);
+ initializeLoopStrengthReducePass(Registry);
+ initializeLoopUnrollPass(Registry);
+ initializeLoopUnswitchPass(Registry);
+ initializeLoopIdiomRecognizePass(Registry);
+ initializeLowerAtomicPass(Registry);
+ initializeMemCpyOptPass(Registry);
+ initializeReassociatePass(Registry);
+ initializeRegToMemPass(Registry);
+ initializeSCCPPass(Registry);
+ initializeIPSCCPPass(Registry);
+ initializeSROA_DTPass(Registry);
+ initializeSROA_SSAUpPass(Registry);
+ initializeCFGSimplifyPassPass(Registry);
+ initializeSimplifyHalfPowrLibCallsPass(Registry);
+ initializeSimplifyLibCallsPass(Registry);
+ initializeSinkingPass(Registry);
+ initializeTailDupPass(Registry);
+ initializeTailCallElimPass(Registry);
+}
+
+void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
+ initializeScalarOpts(*unwrap(R));
+}
+
void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createAggressiveDCEPass());
}
@@ -56,10 +103,6 @@ void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopDeletionPass());
}
-void LLVMAddLoopIndexSplitPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLoopIndexSplitPass());
-}
-
void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopRotatePass());
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index fee317d..c3ca852 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -31,28 +31,34 @@
#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(NumReplaced, "Number of allocas broken up");
STATISTIC(NumPromoted, "Number of allocas promoted");
+STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
STATISTIC(NumConverted, "Number of aggregates converted to scalar");
STATISTIC(NumGlobals, "Number of allocas copied from constant global");
namespace {
struct SROA : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- explicit SROA(signed T = -1) : FunctionPass(ID) {
+ SROA(int T, bool hasDT, char &ID)
+ : FunctionPass(ID), HasDomTree(hasDT) {
if (T == -1)
SRThreshold = 128;
else
@@ -64,17 +70,10 @@ namespace {
bool performScalarRepl(Function &F);
bool performPromotion(Function &F);
- // getAnalysisUsage - This pass does not require any passes, but we know it
- // will not alter the CFG, so say so.
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTree>();
- AU.addRequired<DominanceFrontier>();
- AU.setPreservesCFG();
- }
-
private:
+ bool HasDomTree;
TargetData *TD;
-
+
/// DeadInsts - Keep track of instructions we have made dead, so that
/// we can remove them after we are done working.
SmallVector<Value*, 32> DeadInsts;
@@ -83,39 +82,61 @@ namespace {
/// information about the uses. All these fields are initialized to false
/// and set to true when something is learned.
struct AllocaInfo {
+ /// The alloca to promote.
+ AllocaInst *AI;
+
+ /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite
+ /// looping and avoid redundant work.
+ SmallPtrSet<PHINode*, 8> CheckedPHIs;
+
/// isUnsafe - This is set to true if the alloca cannot be SROA'd.
bool isUnsafe : 1;
-
+
/// isMemCpySrc - This is true if this aggregate is memcpy'd from.
bool isMemCpySrc : 1;
/// isMemCpyDst - This is true if this aggregate is memcpy'd into.
bool isMemCpyDst : 1;
- AllocaInfo()
- : isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false) {}
+ /// hasSubelementAccess - This is true if a subelement of the alloca is
+ /// ever accessed, or false if the alloca is only accessed with mem
+ /// intrinsics or load/store that only access the entire alloca at once.
+ bool hasSubelementAccess : 1;
+
+ /// hasALoadOrStore - This is true if there are any loads or stores to it.
+ /// The alloca may just be accessed with memcpy, for example, which would
+ /// not set this.
+ bool hasALoadOrStore : 1;
+
+ explicit AllocaInfo(AllocaInst *ai)
+ : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false),
+ hasSubelementAccess(false), hasALoadOrStore(false) {}
};
-
+
unsigned SRThreshold;
- void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; }
+ void MarkUnsafe(AllocaInfo &I, Instruction *User) {
+ I.isUnsafe = true;
+ DEBUG(dbgs() << " Transformation preventing inst: " << *User << '\n');
+ }
bool isSafeAllocaToScalarRepl(AllocaInst *AI);
- void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
- AllocaInfo &Info);
- void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset,
- AllocaInfo &Info);
- void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize,
- const Type *MemOpType, bool isStore, AllocaInfo &Info);
+ void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info);
+ void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset,
+ AllocaInfo &Info);
+ void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info);
+ void isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
+ const Type *MemOpType, bool isStore, AllocaInfo &Info,
+ Instruction *TheAccess, bool AllowWholeAccess);
bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size);
uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset,
const Type *&IdxTy);
-
- void DoScalarReplacement(AllocaInst *AI,
+
+ void DoScalarReplacement(AllocaInst *AI,
std::vector<AllocaInst*> &WorkList);
void DeleteDeadInstructions();
-
+
void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
@@ -129,18 +150,63 @@ namespace {
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
-
+
static MemTransferInst *isOnlyCopiedFromConstantGlobal(AllocaInst *AI);
};
+
+ // SROA_DT - SROA that uses DominatorTree.
+ struct SROA_DT : public SROA {
+ static char ID;
+ public:
+ SROA_DT(int T = -1) : SROA(T, true, ID) {
+ initializeSROA_DTPass(*PassRegistry::getPassRegistry());
+ }
+
+ // getAnalysisUsage - This pass does not require any passes, but we know it
+ // will not alter the CFG, so say so.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.setPreservesCFG();
+ }
+ };
+
+ // SROA_SSAUp - SROA that uses SSAUpdater.
+ struct SROA_SSAUp : public SROA {
+ static char ID;
+ public:
+ SROA_SSAUp(int T = -1) : SROA(T, false, ID) {
+ initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry());
+ }
+
+ // getAnalysisUsage - This pass does not require any passes, but we know it
+ // will not alter the CFG, so say so.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+ };
+
}
-char SROA::ID = 0;
-INITIALIZE_PASS(SROA, "scalarrepl",
- "Scalar Replacement of Aggregates", false, false);
+char SROA_DT::ID = 0;
+char SROA_SSAUp::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl",
+ "Scalar Replacement of Aggregates (DT)", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(SROA_DT, "scalarrepl",
+ "Scalar Replacement of Aggregates (DT)", false, false)
+
+INITIALIZE_PASS_BEGIN(SROA_SSAUp, "scalarrepl-ssa",
+ "Scalar Replacement of Aggregates (SSAUp)", false, false)
+INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa",
+ "Scalar Replacement of Aggregates (SSAUp)", false, false)
// Public interface to the ScalarReplAggregates pass
-FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) {
- return new SROA(Threshold);
+FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold,
+ bool UseDomTree) {
+ if (UseDomTree)
+ return new SROA_DT(Threshold);
+ return new SROA_SSAUp(Threshold);
}
@@ -156,16 +222,16 @@ class ConvertToScalarInfo {
/// AllocaSize - The size of the alloca being considered.
unsigned AllocaSize;
const TargetData &TD;
-
+
/// IsNotTrivial - This is set to true if there is some access to the object
/// which means that mem2reg can't promote it.
bool IsNotTrivial;
-
+
/// VectorTy - This tracks the type that we should promote the vector to if
/// it is possible to turn it into a vector. This starts out null, and if it
/// isn't possible to turn into a vector type, it gets set to VoidTy.
const Type *VectorTy;
-
+
/// HadAVector - True if there is at least one vector access to the alloca.
/// We don't want to turn random arrays into vectors and use vector element
/// insert/extract, but if there are element accesses to something that is
@@ -179,14 +245,14 @@ public:
VectorTy = 0;
HadAVector = false;
}
-
+
AllocaInst *TryConvert(AllocaInst *AI);
-
+
private:
bool CanConvertToScalar(Value *V, uint64_t Offset);
void MergeInType(const Type *In, uint64_t Offset);
void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
-
+
Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
uint64_t Offset, IRBuilder<> &Builder);
Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
@@ -195,26 +261,6 @@ private:
} // end anonymous namespace.
-/// IsVerbotenVectorType - Return true if this is a vector type ScalarRepl isn't
-/// allowed to form. We do this to avoid MMX types, which is a complete hack,
-/// but is required until the backend is fixed.
-static bool IsVerbotenVectorType(const VectorType *VTy, const Instruction *I) {
- StringRef Triple(I->getParent()->getParent()->getParent()->getTargetTriple());
- if (!Triple.startswith("i386") &&
- !Triple.startswith("x86_64"))
- return false;
-
- // Reject all the MMX vector types.
- switch (VTy->getNumElements()) {
- default: return false;
- case 1: return VTy->getElementType()->isIntegerTy(64);
- case 2: return VTy->getElementType()->isIntegerTy(32);
- case 4: return VTy->getElementType()->isIntegerTy(16);
- case 8: return VTy->getElementType()->isIntegerTy(8);
- }
-}
-
-
/// TryConvert - Analyze the specified alloca, and if it is safe to do so,
/// rewrite it to be a new alloca which is mem2reg'able. This returns the new
/// alloca if possible or null if not.
@@ -223,7 +269,7 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// out.
if (!CanConvertToScalar(AI, 0) || !IsNotTrivial)
return 0;
-
+
// If we were able to find a vector type that can handle this with
// insert/extract elements, and if there was at least one use that had
// a vector type, promote this to a vector. We don't want to promote
@@ -231,8 +277,7 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// we just get a lot of insert/extracts. If at least one vector is
// involved, then we probably really do have a union of vector/array.
const Type *NewTy;
- if (VectorTy && VectorTy->isVectorTy() && HadAVector &&
- !IsVerbotenVectorType(cast<VectorType>(VectorTy), AI)) {
+ if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = "
<< *VectorTy << '\n');
NewTy = VectorTy; // Use the vector type.
@@ -263,7 +308,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
// nothing to be done.
if (VectorTy && VectorTy->isVoidTy())
return;
-
+
// If this could be contributing to a vector, analyze it.
// If the In type is a vector that is the same size as the alloca, see if it
@@ -271,7 +316,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
// Remember if we saw a vector type.
HadAVector = true;
-
+
if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
// If we're storing/loading a vector of the right size, allow it as a
// vector. If this the first vector we see, remember the type so that
@@ -290,7 +335,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
// compatible with it.
unsigned EltSize = In->getPrimitiveSizeInBits()/8;
if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
- (VectorTy == 0 ||
+ (VectorTy == 0 ||
cast<VectorType>(VectorTy)->getElementType()
->getPrimitiveSizeInBits()/8 == EltSize)) {
if (VectorTy == 0)
@@ -298,7 +343,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
return;
}
}
-
+
// Otherwise, we have a case that we can't handle with an optimized vector
// form. We can still turn this into a large integer.
VectorTy = Type::getVoidTy(In->getContext());
@@ -316,22 +361,28 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
-
+
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
// Don't break volatile loads.
if (LI->isVolatile())
return false;
+ // Don't touch MMX operations.
+ if (LI->getType()->isX86_MMXTy())
+ return false;
MergeInType(LI->getType(), Offset);
continue;
}
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Storing the pointer, not into the value?
if (SI->getOperand(0) == V || SI->isVolatile()) return false;
+ // Don't touch MMX operations.
+ if (SI->getOperand(0)->getType()->isX86_MMXTy())
+ return false;
MergeInType(SI->getOperand(0)->getType(), Offset);
continue;
}
-
+
if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
IsNotTrivial = true; // Can't be mem2reg'd.
if (!CanConvertToScalar(BCI, Offset))
@@ -343,7 +394,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// If this is a GEP with a variable indices, we can't handle it.
if (!GEP->hasAllConstantIndices())
return false;
-
+
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
@@ -372,15 +423,15 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
return false;
-
+
IsNotTrivial = true; // Can't be mem2reg'd.
continue;
}
-
+
// Otherwise, we cannot handle this!
return false;
}
-
+
return true;
}
@@ -411,9 +462,9 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
GEP->eraseFromParent();
continue;
}
-
- IRBuilder<> Builder(User->getParent(), User);
-
+
+ IRBuilder<> Builder(User);
+
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
// The load is a bit extract from NewAI shifted right by Offset bits.
Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
@@ -423,7 +474,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
LI->eraseFromParent();
continue;
}
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
assert(SI->getOperand(0) != Ptr && "Consistency error!");
Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
@@ -431,14 +482,14 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
Builder);
Builder.CreateStore(New, NewAI);
SI->eraseFromParent();
-
+
// If the load we just inserted is now dead, then the inserted store
// overwrote the entire thing.
if (Old->use_empty())
Old->eraseFromParent();
continue;
}
-
+
// If this is a constant sized memset of a constant value (e.g. 0) we can
// transform it into a store of the expanded constant value.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
@@ -446,7 +497,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
if (NumBytes != 0) {
unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
-
+
// Compute the value replicated the right number of times.
APInt APVal(NumBytes*8, Val);
@@ -454,17 +505,17 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
if (Val)
for (unsigned i = 1; i != NumBytes; ++i)
APVal |= APVal << 8;
-
+
Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
Value *New = ConvertScalar_InsertValue(
ConstantInt::get(User->getContext(), APVal),
Old, Offset, Builder);
Builder.CreateStore(New, NewAI);
-
+
// If the load we just inserted is now dead, then the memset overwrote
// the entire thing.
if (Old->use_empty())
- Old->eraseFromParent();
+ Old->eraseFromParent();
}
MSI->eraseFromParent();
continue;
@@ -474,29 +525,42 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// can handle it like a load or store of the scalar type.
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
assert(Offset == 0 && "must be store to start of alloca");
-
+
// If the source and destination are both to the same alloca, then this is
// a noop copy-to-self, just delete it. Otherwise, emit a load and store
// as appropriate.
- AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject(0));
-
- if (MTI->getSource()->getUnderlyingObject(0) != OrigAI) {
+ AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &TD, 0));
+
+ if (GetUnderlyingObject(MTI->getSource(), &TD, 0) != OrigAI) {
// Dest must be OrigAI, change this to be a load from the original
// pointer (bitcasted), then a store to our new alloca.
assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
Value *SrcPtr = MTI->getSource();
- SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType());
-
+ const PointerType* SPTy = cast<PointerType>(SrcPtr->getType());
+ const PointerType* AIPTy = cast<PointerType>(NewAI->getType());
+ if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
+ AIPTy = PointerType::get(AIPTy->getElementType(),
+ SPTy->getAddressSpace());
+ }
+ SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy);
+
LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
SrcVal->setAlignment(MTI->getAlignment());
Builder.CreateStore(SrcVal, NewAI);
- } else if (MTI->getDest()->getUnderlyingObject(0) != OrigAI) {
+ } else if (GetUnderlyingObject(MTI->getDest(), &TD, 0) != OrigAI) {
// Src must be OrigAI, change this to be a load from NewAI then a store
// through the original dest pointer (bitcasted).
assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
- Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType());
+ const PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType());
+ const PointerType* AIPTy = cast<PointerType>(NewAI->getType());
+ if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
+ AIPTy = PointerType::get(AIPTy->getElementType(),
+ DPTy->getAddressSpace());
+ }
+ Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy);
+
StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
NewStore->setAlignment(MTI->getAlignment());
} else {
@@ -506,7 +570,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
MTI->eraseFromParent();
continue;
}
-
+
llvm_unreachable("Unsupported operation!");
}
}
@@ -548,7 +612,7 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
V = Builder.CreateBitCast(V, ToType, "tmp");
return V;
}
-
+
// If ToType is a first class aggregate, extract out each of the pieces and
// use insertvalue's to form the FCA.
if (const StructType *ST = dyn_cast<StructType>(ToType)) {
@@ -562,7 +626,7 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
}
return Res;
}
-
+
if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
Value *Res = UndefValue::get(AT);
@@ -598,7 +662,7 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
ConstantInt::get(FromVal->getType(),
ShAmt), "tmp");
else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
- FromVal = Builder.CreateShl(FromVal,
+ FromVal = Builder.CreateShl(FromVal,
ConstantInt::get(FromVal->getType(),
-ShAmt), "tmp");
@@ -606,11 +670,11 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
unsigned LIBitWidth = TD.getTypeSizeInBits(ToType);
if (LIBitWidth < NTy->getBitWidth())
FromVal =
- Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
+ Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
LIBitWidth), "tmp");
else if (LIBitWidth > NTy->getBitWidth())
FromVal =
- Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
+ Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
LIBitWidth), "tmp");
// If the result is an integer, this is a trunc or bitcast.
@@ -647,7 +711,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy);
uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType());
-
+
// Changing the whole vector with memset or with an access of a different
// vector type?
if (ValSize == VecSize)
@@ -657,28 +721,28 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
// Must be an element insertion.
unsigned Elt = Offset/EltSize;
-
+
if (SV->getType() != VTy->getElementType())
SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
-
- SV = Builder.CreateInsertElement(Old, SV,
+
+ SV = Builder.CreateInsertElement(Old, SV,
ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
"tmp");
return SV;
}
-
+
// If SV is a first-class aggregate value, insert each value recursively.
if (const StructType *ST = dyn_cast<StructType>(SV->getType())) {
const StructLayout &Layout = *TD.getStructLayout(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
- Old = ConvertScalar_InsertValue(Elt, Old,
+ Old = ConvertScalar_InsertValue(Elt, Old,
Offset+Layout.getElementOffsetInBits(i),
Builder);
}
return Old;
}
-
+
if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
@@ -778,16 +842,298 @@ bool SROA::runOnFunction(Function &F) {
return Changed;
}
+namespace {
+class AllocaPromoter : public LoadAndStorePromoter {
+ AllocaInst *AI;
+public:
+ AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S)
+ : LoadAndStorePromoter(Insts, S), AI(0) {}
+
+ void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
+ // Remember which alloca we're promoting (for isInstInList).
+ this->AI = AI;
+ LoadAndStorePromoter::run(Insts);
+ AI->eraseFromParent();
+ }
+
+ virtual bool isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &Insts) const {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->getOperand(0) == AI;
+ return cast<StoreInst>(I)->getPointerOperand() == AI;
+ }
+};
+} // end anon namespace
+
+/// isSafeSelectToSpeculate - Select instructions that use an alloca and are
+/// subsequently loaded can be rewritten to load both input pointers and then
+/// select between the result, allowing the load of the alloca to be promoted.
+/// From this:
+/// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
+/// %V = load i32* %P2
+/// to:
+/// %V1 = load i32* %Alloca -> will be mem2reg'd
+/// %V2 = load i32* %Other
+/// %V = select i1 %cond, i32 %V1, i32 %V2
+///
+/// We can do this to a select if its only uses are loads and if the operand to
+/// the select can be loaded unconditionally.
+static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) {
+ bool TDerefable = SI->getTrueValue()->isDereferenceablePointer();
+ bool FDerefable = SI->getFalseValue()->isDereferenceablePointer();
+
+ for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
+ UI != UE; ++UI) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ if (LI == 0 || LI->isVolatile()) return false;
+
+ // Both operands to the select need to be dereferencable, either absolutely
+ // (e.g. allocas) or at this point because we can see other accesses to it.
+ if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
+ LI->getAlignment(), TD))
+ return false;
+ if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI,
+ LI->getAlignment(), TD))
+ return false;
+ }
+
+ return true;
+}
+
+/// isSafePHIToSpeculate - PHI instructions that use an alloca and are
+/// subsequently loaded can be rewritten to load both input pointers in the pred
+/// blocks and then PHI the results, allowing the load of the alloca to be
+/// promoted.
+/// From this:
+/// %P2 = phi [i32* %Alloca, i32* %Other]
+/// %V = load i32* %P2
+/// to:
+/// %V1 = load i32* %Alloca -> will be mem2reg'd
+/// ...
+/// %V2 = load i32* %Other
+/// ...
+/// %V = phi [i32 %V1, i32 %V2]
+///
+/// We can do this to a select if its only uses are loads and if the operand to
+/// the select can be loaded unconditionally.
+static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
+ // For now, we can only do this promotion if the load is in the same block as
+ // the PHI, and if there are no stores between the phi and load.
+ // TODO: Allow recursive phi users.
+ // TODO: Allow stores.
+ BasicBlock *BB = PN->getParent();
+ unsigned MaxAlign = 0;
+ for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
+ UI != UE; ++UI) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ if (LI == 0 || LI->isVolatile()) return false;
+
+ // For now we only allow loads in the same block as the PHI. This is a
+ // common case that happens when instcombine merges two loads through a PHI.
+ if (LI->getParent() != BB) return false;
+
+ // Ensure that there are no instructions between the PHI and the load that
+ // could store.
+ for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI)
+ if (BBI->mayWriteToMemory())
+ return false;
+
+ MaxAlign = std::max(MaxAlign, LI->getAlignment());
+ }
+
+ // Okay, we know that we have one or more loads in the same block as the PHI.
+ // We can transform this if it is safe to push the loads into the predecessor
+ // blocks. The only thing to watch out for is that we can't put a possibly
+ // trapping load in the predecessor if it is a critical edge.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+
+ // If the predecessor has a single successor, then the edge isn't critical.
+ if (Pred->getTerminator()->getNumSuccessors() == 1)
+ continue;
+
+ Value *InVal = PN->getIncomingValue(i);
+
+ // If the InVal is an invoke in the pred, we can't put a load on the edge.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
+ if (II->getParent() == Pred)
+ return false;
+
+ // If this pointer is always safe to load, or if we can prove that there is
+ // already a load in the block, then we can move the load to the pred block.
+ if (InVal->isDereferenceablePointer() ||
+ isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD))
+ continue;
+
+ return false;
+ }
+
+ return true;
+}
+
+
+/// tryToMakeAllocaBePromotable - This returns true if the alloca only has
+/// direct (non-volatile) loads and stores to it. If the alloca is close but
+/// not quite there, this will transform the code to allow promotion. As such,
+/// it is a non-pure predicate.
+static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
+ SetVector<Instruction*, SmallVector<Instruction*, 4>,
+ SmallPtrSet<Instruction*, 4> > InstsToRewrite;
+
+ for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE; ++UI) {
+ User *U = *UI;
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ if (LI->isVolatile())
+ return false;
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getOperand(0) == AI || SI->isVolatile())
+ return false; // Don't allow a store OF the AI, only INTO the AI.
+ continue;
+ }
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(U)) {
+ // If the condition being selected on is a constant, fold the select, yes
+ // this does (rarely) happen early on.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition())) {
+ Value *Result = SI->getOperand(1+CI->isZero());
+ SI->replaceAllUsesWith(Result);
+ SI->eraseFromParent();
+
+ // This is very rare and we just scrambled the use list of AI, start
+ // over completely.
+ return tryToMakeAllocaBePromotable(AI, TD);
+ }
+
+ // If it is safe to turn "load (select c, AI, ptr)" into a select of two
+ // loads, then we can transform this by rewriting the select.
+ if (!isSafeSelectToSpeculate(SI, TD))
+ return false;
+
+ InstsToRewrite.insert(SI);
+ continue;
+ }
+
+ if (PHINode *PN = dyn_cast<PHINode>(U)) {
+ if (PN->use_empty()) { // Dead PHIs can be stripped.
+ InstsToRewrite.insert(PN);
+ continue;
+ }
+
+ // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
+ // in the pred blocks, then we can transform this by rewriting the PHI.
+ if (!isSafePHIToSpeculate(PN, TD))
+ return false;
+
+ InstsToRewrite.insert(PN);
+ continue;
+ }
+
+ return false;
+ }
+
+ // If there are no instructions to rewrite, then all uses are load/stores and
+ // we're done!
+ if (InstsToRewrite.empty())
+ return true;
+
+ // If we have instructions that need to be rewritten for this to be promotable
+ // take care of it now.
+ for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
+ if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) {
+ // Selects in InstsToRewrite only have load uses. Rewrite each as two
+ // loads with a new select.
+ while (!SI->use_empty()) {
+ LoadInst *LI = cast<LoadInst>(SI->use_back());
+
+ IRBuilder<> Builder(LI);
+ LoadInst *TrueLoad =
+ Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t");
+ LoadInst *FalseLoad =
+ Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".t");
+
+ // Transfer alignment and TBAA info if present.
+ TrueLoad->setAlignment(LI->getAlignment());
+ FalseLoad->setAlignment(LI->getAlignment());
+ if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
+ TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+ FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+ }
+
+ Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad);
+ V->takeName(LI);
+ LI->replaceAllUsesWith(V);
+ LI->eraseFromParent();
+ }
+
+ // Now that all the loads are gone, the select is gone too.
+ SI->eraseFromParent();
+ continue;
+ }
+
+ // Otherwise, we have a PHI node which allows us to push the loads into the
+ // predecessors.
+ PHINode *PN = cast<PHINode>(InstsToRewrite[i]);
+ if (PN->use_empty()) {
+ PN->eraseFromParent();
+ continue;
+ }
+
+ const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
+ PHINode *NewPN = PHINode::Create(LoadTy, PN->getName()+".ld", PN);
+
+ // Get the TBAA tag and alignment to use from one of the loads. It doesn't
+ // matter which one we get and if any differ, it doesn't matter.
+ LoadInst *SomeLoad = cast<LoadInst>(PN->use_back());
+ MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
+ unsigned Align = SomeLoad->getAlignment();
+
+ // Rewrite all loads of the PN to use the new PHI.
+ while (!PN->use_empty()) {
+ LoadInst *LI = cast<LoadInst>(PN->use_back());
+ LI->replaceAllUsesWith(NewPN);
+ LI->eraseFromParent();
+ }
+
+ // Inject loads into all of the pred blocks. Keep track of which blocks we
+ // insert them into in case we have multiple edges from the same block.
+ DenseMap<BasicBlock*, LoadInst*> InsertedLoads;
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ LoadInst *&Load = InsertedLoads[Pred];
+ if (Load == 0) {
+ Load = new LoadInst(PN->getIncomingValue(i),
+ PN->getName() + "." + Pred->getName(),
+ Pred->getTerminator());
+ Load->setAlignment(Align);
+ if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+ }
+
+ NewPN->addIncoming(Load, Pred);
+ }
+
+ PN->eraseFromParent();
+ }
+
+ ++NumAdjusted;
+ return true;
+}
+
bool SROA::performPromotion(Function &F) {
std::vector<AllocaInst*> Allocas;
- DominatorTree &DT = getAnalysis<DominatorTree>();
- DominanceFrontier &DF = getAnalysis<DominanceFrontier>();
+ DominatorTree *DT = 0;
+ if (HasDomTree)
+ DT = &getAnalysis<DominatorTree>();
BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
bool Changed = false;
-
+ SmallVector<Instruction*, 64> Insts;
while (1) {
Allocas.clear();
@@ -795,12 +1141,27 @@ bool SROA::performPromotion(Function &F) {
// the entry node
for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
- if (isAllocaPromotable(AI))
+ if (tryToMakeAllocaBePromotable(AI, TD))
Allocas.push_back(AI);
if (Allocas.empty()) break;
- PromoteMemToReg(Allocas, DT, DF);
+ if (HasDomTree)
+ PromoteMemToReg(Allocas, *DT);
+ else {
+ SSAUpdater SSA;
+ for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
+ AllocaInst *AI = Allocas[i];
+
+ // Build list of instructions to promote.
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E; ++UI)
+ Insts.push_back(cast<Instruction>(*UI));
+
+ AllocaPromoter(Insts, SSA).run(AI, Insts);
+ Insts.clear();
+ }
+ }
NumPromoted += Allocas.size();
Changed = true;
}
@@ -842,7 +1203,7 @@ bool SROA::performScalarRepl(Function &F) {
while (!WorkList.empty()) {
AllocaInst *AI = WorkList.back();
WorkList.pop_back();
-
+
// Handle dead allocas trivially. These can be formed by SROA'ing arrays
// with unused elements.
if (AI->use_empty()) {
@@ -854,7 +1215,7 @@ bool SROA::performScalarRepl(Function &F) {
// If this alloca is impossible for us to promote, reject it early.
if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
continue;
-
+
// Check to see if this allocation is only modified by a memcpy/memmove from
// a constant global. If this is the case, we can change all users to use
// the constant global instead. This is commonly produced by the CFE by
@@ -871,7 +1232,7 @@ bool SROA::performScalarRepl(Function &F) {
Changed = true;
continue;
}
-
+
// Check to see if we can perform the core SROA transformation. We cannot
// transform the allocation instruction if it is an array allocation
// (allocations OF arrays are ok though), and an allocation of a scalar
@@ -880,10 +1241,10 @@ bool SROA::performScalarRepl(Function &F) {
// Do not promote [0 x %struct].
if (AllocaSize == 0) continue;
-
+
// Do not promote any struct whose size is too big.
if (AllocaSize > SRThreshold) continue;
-
+
// If the alloca looks like a good candidate for scalar replacement, and if
// all its users can be transformed, then split up the aggregate into its
// separate elements.
@@ -906,8 +1267,8 @@ bool SROA::performScalarRepl(Function &F) {
++NumConverted;
Changed = true;
continue;
- }
-
+ }
+
// Otherwise, couldn't process this alloca.
}
@@ -916,14 +1277,14 @@ bool SROA::performScalarRepl(Function &F) {
/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
/// predicate, do SROA now.
-void SROA::DoScalarReplacement(AllocaInst *AI,
+void SROA::DoScalarReplacement(AllocaInst *AI,
std::vector<AllocaInst*> &WorkList) {
DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n');
SmallVector<AllocaInst*, 32> ElementAllocas;
if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
ElementAllocas.reserve(ST->getNumContainedTypes());
for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
- AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
+ AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
AI->getAlignment(),
AI->getName() + "." + Twine(i), AI);
ElementAllocas.push_back(NA);
@@ -971,48 +1332,106 @@ void SROA::DeleteDeadInstructions() {
I->eraseFromParent();
}
}
-
+
/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to
/// performing scalar replacement of alloca AI. The results are flagged in
/// the Info parameter. Offset indicates the position within AI that is
/// referenced by this instruction.
-void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
AllocaInfo &Info) {
for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
- isSafeForScalarRepl(BC, AI, Offset, Info);
+ isSafeForScalarRepl(BC, Offset, Info);
} else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
uint64_t GEPOffset = Offset;
- isSafeGEP(GEPI, AI, GEPOffset, Info);
+ isSafeGEP(GEPI, GEPOffset, Info);
if (!Info.isUnsafe)
- isSafeForScalarRepl(GEPI, AI, GEPOffset, Info);
+ isSafeForScalarRepl(GEPI, GEPOffset, Info);
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
- if (Length)
- isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0,
- UI.getOperandNo() == 0, Info);
- else
- MarkUnsafe(Info);
+ if (Length == 0)
+ return MarkUnsafe(Info, User);
+ isSafeMemAccess(Offset, Length->getZExtValue(), 0,
+ UI.getOperandNo() == 0, Info, MI,
+ true /*AllowWholeAccess*/);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ if (LI->isVolatile())
+ return MarkUnsafe(Info, User);
+ const Type *LIType = LI->getType();
+ isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
+ LIType, false, Info, LI, true /*AllowWholeAccess*/);
+ Info.hasALoadOrStore = true;
+
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Store is ok if storing INTO the pointer, not storing the pointer
+ if (SI->isVolatile() || SI->getOperand(0) == I)
+ return MarkUnsafe(Info, User);
+
+ const Type *SIType = SI->getOperand(0)->getType();
+ isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
+ SIType, true, Info, SI, true /*AllowWholeAccess*/);
+ Info.hasALoadOrStore = true;
+ } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
+ isSafePHISelectUseForScalarRepl(User, Offset, Info);
+ } else {
+ return MarkUnsafe(Info, User);
+ }
+ if (Info.isUnsafe) return;
+ }
+}
+
+
+/// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer
+/// derived from the alloca, we can often still split the alloca into elements.
+/// This is useful if we have a large alloca where one element is phi'd
+/// together somewhere: we can SRoA and promote all the other elements even if
+/// we end up not being able to promote this one.
+///
+/// All we require is that the uses of the PHI do not index into other parts of
+/// the alloca. The most important use case for this is single load and stores
+/// that are PHI'd together, which can happen due to code sinking.
+void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
+ AllocaInfo &Info) {
+ // If we've already checked this PHI, don't do it again.
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ if (!Info.CheckedPHIs.insert(PN))
+ return;
+
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+ isSafePHISelectUseForScalarRepl(BC, Offset, Info);
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ // Only allow "bitcast" GEPs for simplicity. We could generalize this,
+ // but would have to prove that we're staying inside of an element being
+ // promoted.
+ if (!GEPI->hasAllZeroIndices())
+ return MarkUnsafe(Info, User);
+ isSafePHISelectUseForScalarRepl(GEPI, Offset, Info);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- if (!LI->isVolatile()) {
- const Type *LIType = LI->getType();
- isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(LIType),
- LIType, false, Info);
- } else
- MarkUnsafe(Info);
+ if (LI->isVolatile())
+ return MarkUnsafe(Info, User);
+ const Type *LIType = LI->getType();
+ isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
+ LIType, false, Info, LI, false /*AllowWholeAccess*/);
+ Info.hasALoadOrStore = true;
+
} else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Store is ok if storing INTO the pointer, not storing the pointer
- if (!SI->isVolatile() && SI->getOperand(0) != I) {
- const Type *SIType = SI->getOperand(0)->getType();
- isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(SIType),
- SIType, true, Info);
- } else
- MarkUnsafe(Info);
+ if (SI->isVolatile() || SI->getOperand(0) == I)
+ return MarkUnsafe(Info, User);
+
+ const Type *SIType = SI->getOperand(0)->getType();
+ isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
+ SIType, true, Info, SI, false /*AllowWholeAccess*/);
+ Info.hasALoadOrStore = true;
+ } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
+ isSafePHISelectUseForScalarRepl(User, Offset, Info);
} else {
- DEBUG(errs() << " Transformation preventing inst: " << *User << '\n');
- MarkUnsafe(Info);
+ return MarkUnsafe(Info, User);
}
if (Info.isUnsafe) return;
}
@@ -1023,7 +1442,7 @@ void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
/// references, and when the resulting offset corresponds to an element within
/// the alloca type. The results are flagged in the Info parameter. Upon
/// return, Offset is adjusted as specified by the GEP indices.
-void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI,
+void SROA::isSafeGEP(GetElementPtrInst *GEPI,
uint64_t &Offset, AllocaInfo &Info) {
gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
if (GEPIt == E)
@@ -1038,7 +1457,7 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI,
ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
if (!IdxVal)
- return MarkUnsafe(Info);
+ return MarkUnsafe(Info, GEPI);
}
// Compute the offset due to this GEP and check if the alloca has a
@@ -1046,40 +1465,92 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI,
SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
&Indices[0], Indices.size());
- if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0))
- MarkUnsafe(Info);
+ if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0))
+ MarkUnsafe(Info, GEPI);
+}
+
+/// isHomogeneousAggregate - Check if type T is a struct or array containing
+/// elements of the same type (which is always true for arrays). If so,
+/// return true with NumElts and EltTy set to the number of elements and the
+/// element type, respectively.
+static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts,
+ const Type *&EltTy) {
+ if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
+ NumElts = AT->getNumElements();
+ EltTy = (NumElts == 0 ? 0 : AT->getElementType());
+ return true;
+ }
+ if (const StructType *ST = dyn_cast<StructType>(T)) {
+ NumElts = ST->getNumContainedTypes();
+ EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0));
+ for (unsigned n = 1; n < NumElts; ++n) {
+ if (ST->getContainedType(n) != EltTy)
+ return false;
+ }
+ return true;
+ }
+ return false;
+}
+
+/// isCompatibleAggregate - Check if T1 and T2 are either the same type or are
+/// "homogeneous" aggregates with the same element type and number of elements.
+static bool isCompatibleAggregate(const Type *T1, const Type *T2) {
+ if (T1 == T2)
+ return true;
+
+ unsigned NumElts1, NumElts2;
+ const Type *EltTy1, *EltTy2;
+ if (isHomogeneousAggregate(T1, NumElts1, EltTy1) &&
+ isHomogeneousAggregate(T2, NumElts2, EltTy2) &&
+ NumElts1 == NumElts2 &&
+ EltTy1 == EltTy2)
+ return true;
+
+ return false;
}
/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI
/// alloca or has an offset and size that corresponds to a component element
/// within it. The offset checked here may have been formed from a GEP with a
/// pointer bitcasted to a different type.
-void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize,
+///
+/// If AllowWholeAccess is true, then this allows uses of the entire alloca as a
+/// unit. If false, it only allows accesses known to be in a single element.
+void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
const Type *MemOpType, bool isStore,
- AllocaInfo &Info) {
+ AllocaInfo &Info, Instruction *TheAccess,
+ bool AllowWholeAccess) {
// Check if this is a load/store of the entire alloca.
- if (Offset == 0 && MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) {
- bool UsesAggregateType = (MemOpType == AI->getAllocatedType());
- // This is safe for MemIntrinsics (where MemOpType is 0), integer types
- // (which are essentially the same as the MemIntrinsics, especially with
- // regard to copying padding between elements), or references using the
- // aggregate type of the alloca.
- if (!MemOpType || MemOpType->isIntegerTy() || UsesAggregateType) {
- if (!UsesAggregateType) {
- if (isStore)
- Info.isMemCpyDst = true;
- else
- Info.isMemCpySrc = true;
- }
+ if (Offset == 0 && AllowWholeAccess &&
+ MemSize == TD->getTypeAllocSize(Info.AI->getAllocatedType())) {
+ // This can be safe for MemIntrinsics (where MemOpType is 0) and integer
+ // loads/stores (which are essentially the same as the MemIntrinsics with
+ // regard to copying padding between elements). But, if an alloca is
+ // flagged as both a source and destination of such operations, we'll need
+ // to check later for padding between elements.
+ if (!MemOpType || MemOpType->isIntegerTy()) {
+ if (isStore)
+ Info.isMemCpyDst = true;
+ else
+ Info.isMemCpySrc = true;
+ return;
+ }
+ // This is also safe for references using a type that is compatible with
+ // the type of the alloca, so that loads/stores can be rewritten using
+ // insertvalue/extractvalue.
+ if (isCompatibleAggregate(MemOpType, Info.AI->getAllocatedType())) {
+ Info.hasSubelementAccess = true;
return;
}
}
// Check if the offset/size correspond to a component within the alloca type.
- const Type *T = AI->getAllocatedType();
- if (TypeHasComponent(T, Offset, MemSize))
+ const Type *T = Info.AI->getAllocatedType();
+ if (TypeHasComponent(T, Offset, MemSize)) {
+ Info.hasSubelementAccess = true;
return;
+ }
- return MarkUnsafe(Info);
+ return MarkUnsafe(Info, TheAccess);
}
/// TypeHasComponent - Return true if T has a component type with the
@@ -1116,14 +1587,21 @@ bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) {
/// instruction.
void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts) {
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI++);
if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
RewriteBitCast(BC, AI, Offset, NewElts);
- } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ continue;
+ }
+
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
RewriteGEP(GEPI, AI, Offset, NewElts);
- } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
+ continue;
+ }
+
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
uint64_t MemSize = Length->getZExtValue();
if (Offset == 0 &&
@@ -1131,9 +1609,13 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
// Otherwise the intrinsic can only touch a single element and the
// address operand will be updated, so nothing else needs to be done.
- } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ continue;
+ }
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
const Type *LIType = LI->getType();
- if (LIType == AI->getAllocatedType()) {
+
+ if (isCompatibleAggregate(LIType, AI->getAllocatedType())) {
// Replace:
// %res = load { i32, i32 }* %alloc
// with:
@@ -1155,10 +1637,13 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
// If this is a load of the entire alloca to an integer, rewrite it.
RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
}
- } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
Value *Val = SI->getOperand(0);
const Type *SIType = Val->getType();
- if (SIType == AI->getAllocatedType()) {
+ if (isCompatibleAggregate(SIType, AI->getAllocatedType())) {
// Replace:
// store { i32, i32 } %val, { i32, i32 }* %alloc
// with:
@@ -1178,6 +1663,26 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
// If this is a store of the entire alloca from an integer, rewrite it.
RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
}
+ continue;
+ }
+
+ if (isa<SelectInst>(User) || isa<PHINode>(User)) {
+ // If we have a PHI user of the alloca itself (as opposed to a GEP or
+ // bitcast) we have to rewrite it. GEP and bitcast uses will be RAUW'd to
+ // the new pointer.
+ if (!isa<AllocaInst>(I)) continue;
+
+ assert(Offset == 0 && NewElts[0] &&
+ "Direct alloca use should have a zero offset");
+
+ // If we have a use of the alloca, we know the derived uses will be
+ // utilizing just the first element of the scalarized result. Insert a
+ // bitcast of the first alloca before the user as required.
+ AllocaInst *NewAI = NewElts[0];
+ BitCastInst *BCI = new BitCastInst(NewAI, AI->getType(), "", NewAI);
+ NewAI->moveBefore(BCI);
+ TheUse = BCI;
+ continue;
}
}
}
@@ -1305,7 +1810,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// function is only called for mem intrinsics that access the whole
// aggregate, so non-zero GEPs are not an issue here.)
OtherPtr = OtherPtr->stripPointerCasts();
-
+
// Copying the alloca to itself is a no-op: just delete it.
if (OtherPtr == AI || OtherPtr == NewElts[0]) {
// This code will run twice for a no-op memcpy -- once for each operand.
@@ -1316,28 +1821,26 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
DeadInsts.push_back(MI);
return;
}
-
+
// If the pointer is not the right type, insert a bitcast to the right
// type.
const Type *NewTy =
PointerType::get(AI->getType()->getElementType(), AddrSpace);
-
+
if (OtherPtr->getType() != NewTy)
OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI);
}
-
+
// Process each element of the aggregate.
- Value *TheFn = MI->getCalledValue();
- const Type *BytePtrTy = MI->getRawDest()->getType();
bool SROADest = MI->getRawDest() == Inst;
-
+
Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// If this is a memcpy/memmove, emit a GEP of the other element address.
Value *OtherElt = 0;
unsigned OtherEltAlign = MemAlignment;
-
+
if (OtherPtr) {
Value *Idx[2] = { Zero,
ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
@@ -1353,7 +1856,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
const Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
EltOffset = TD->getTypeAllocSize(EltTy)*i;
}
-
+
// The alignment of the other pointer is the guaranteed alignment of the
// element, which is affected by both the known alignment of the whole
// mem intrinsic and the alignment of the element. If the alignment of
@@ -1361,10 +1864,10 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// known alignment is just 4 bytes.
OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset);
}
-
+
Value *EltPtr = NewElts[i];
const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
-
+
// If we got down to a scalar, insert a load or store as appropriate.
if (EltTy->isSingleValueType()) {
if (isa<MemTransferInst>(MI)) {
@@ -1380,7 +1883,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
continue;
}
assert(isa<MemSetInst>(MI));
-
+
// If the stored element is zero (common case), just store a null
// constant.
Constant *StoreVal;
@@ -1400,7 +1903,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
TotalVal = TotalVal.shl(8);
TotalVal |= OneVal;
}
-
+
// Convert the integer value to the appropriate type.
StoreVal = ConstantInt::get(CI->getContext(), TotalVal);
if (ValTy->isPointerTy())
@@ -1408,12 +1911,12 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
else if (ValTy->isFloatingPointTy())
StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
assert(StoreVal->getType() == ValTy && "Type mismatch!");
-
+
// If the requested value was a vector constant, create it.
if (EltTy != ValTy) {
unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
- StoreVal = ConstantVector::get(&Elts[0], NumElts);
+ StoreVal = ConstantVector::get(Elts);
}
}
new StoreInst(StoreVal, EltPtr, MI);
@@ -1422,55 +1925,24 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// Otherwise, if we're storing a byte variable, use a memset call for
// this element.
}
-
- // Cast the element pointer to BytePtrTy.
- if (EltPtr->getType() != BytePtrTy)
- EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI);
-
- // Cast the other pointer (if we have one) to BytePtrTy.
- if (OtherElt && OtherElt->getType() != BytePtrTy) {
- // Preserve address space of OtherElt
- const PointerType* OtherPTy = cast<PointerType>(OtherElt->getType());
- const PointerType* PTy = cast<PointerType>(BytePtrTy);
- if (OtherPTy->getElementType() != PTy->getElementType()) {
- Type *NewOtherPTy = PointerType::get(PTy->getElementType(),
- OtherPTy->getAddressSpace());
- OtherElt = new BitCastInst(OtherElt, NewOtherPTy,
- OtherElt->getNameStr(), MI);
- }
- }
-
+
unsigned EltSize = TD->getTypeAllocSize(EltTy);
-
+
+ IRBuilder<> Builder(MI);
+
// Finally, insert the meminst for this element.
- if (isa<MemTransferInst>(MI)) {
- Value *Ops[] = {
- SROADest ? EltPtr : OtherElt, // Dest ptr
- SROADest ? OtherElt : EltPtr, // Src ptr
- ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
- // Align
- ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign),
- MI->getVolatileCst()
- };
- // In case we fold the address space overloaded memcpy of A to B
- // with memcpy of B to C, change the function to be a memcpy of A to C.
- const Type *Tys[] = { Ops[0]->getType(), Ops[1]->getType(),
- Ops[2]->getType() };
- Module *M = MI->getParent()->getParent()->getParent();
- TheFn = Intrinsic::getDeclaration(M, MI->getIntrinsicID(), Tys, 3);
- CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
+ if (isa<MemSetInst>(MI)) {
+ Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize,
+ MI->isVolatile());
} else {
- assert(isa<MemSetInst>(MI));
- Value *Ops[] = {
- EltPtr, MI->getArgOperand(1), // Dest, Value,
- ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
- Zero, // Align
- ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile
- };
- const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
- Module *M = MI->getParent()->getParent()->getParent();
- TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
- CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
+ assert(isa<MemTransferInst>(MI));
+ Value *Dst = SROADest ? EltPtr : OtherElt; // Dest ptr
+ Value *Src = SROADest ? OtherElt : EltPtr; // Src ptr
+
+ if (isa<MemCpyInst>(MI))
+ Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile());
+ else
+ Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile());
}
}
DeadInsts.push_back(MI);
@@ -1486,12 +1958,13 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
Value *SrcVal = SI->getOperand(0);
const Type *AllocaEltTy = AI->getAllocatedType();
uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
+
+ IRBuilder<> Builder(SI);
// Handle tail padding by extending the operand
if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
- SrcVal = new ZExtInst(SrcVal,
- IntegerType::get(SI->getContext(), AllocaSizeBits),
- "", SI);
+ SrcVal = Builder.CreateZExt(SrcVal,
+ IntegerType::get(SI->getContext(), AllocaSizeBits));
DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI
<< '\n');
@@ -1500,47 +1973,44 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
// have different ways to compute the element offset.
if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
const StructLayout *Layout = TD->getStructLayout(EltSTy);
-
+
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// Get the number of bits to shift SrcVal to get the value.
const Type *FieldTy = EltSTy->getElementType(i);
uint64_t Shift = Layout->getElementOffsetInBits(i);
-
+
if (TD->isBigEndian())
Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy);
-
+
Value *EltVal = SrcVal;
if (Shift) {
Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
- EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
- "sroa.store.elt", SI);
+ EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
}
-
+
// Truncate down to an integer of the right size.
uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
-
+
// Ignore zero sized fields like {}, they obviously contain no data.
if (FieldSizeBits == 0) continue;
-
+
if (FieldSizeBits != AllocaSizeBits)
- EltVal = new TruncInst(EltVal,
- IntegerType::get(SI->getContext(), FieldSizeBits),
- "", SI);
+ EltVal = Builder.CreateTrunc(EltVal,
+ IntegerType::get(SI->getContext(), FieldSizeBits));
Value *DestField = NewElts[i];
if (EltVal->getType() == FieldTy) {
// Storing to an integer field of this size, just do it.
} else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) {
// Bitcast to the right element type (for fp/vector values).
- EltVal = new BitCastInst(EltVal, FieldTy, "", SI);
+ EltVal = Builder.CreateBitCast(EltVal, FieldTy);
} else {
// Otherwise, bitcast the dest pointer (for aggregates).
- DestField = new BitCastInst(DestField,
- PointerType::getUnqual(EltVal->getType()),
- "", SI);
+ DestField = Builder.CreateBitCast(DestField,
+ PointerType::getUnqual(EltVal->getType()));
}
new StoreInst(EltVal, DestField, SI);
}
-
+
} else {
const ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
const Type *ArrayEltTy = ATy->getElementType();
@@ -1548,50 +2018,48 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
uint64_t Shift;
-
+
if (TD->isBigEndian())
Shift = AllocaSizeBits-ElementOffset;
- else
+ else
Shift = 0;
-
+
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// Ignore zero sized fields like {}, they obviously contain no data.
if (ElementSizeBits == 0) continue;
-
+
Value *EltVal = SrcVal;
if (Shift) {
Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
- EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
- "sroa.store.elt", SI);
+ EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
}
-
+
// Truncate down to an integer of the right size.
if (ElementSizeBits != AllocaSizeBits)
- EltVal = new TruncInst(EltVal,
- IntegerType::get(SI->getContext(),
- ElementSizeBits),"",SI);
+ EltVal = Builder.CreateTrunc(EltVal,
+ IntegerType::get(SI->getContext(),
+ ElementSizeBits));
Value *DestField = NewElts[i];
if (EltVal->getType() == ArrayEltTy) {
// Storing to an integer field of this size, just do it.
} else if (ArrayEltTy->isFloatingPointTy() ||
ArrayEltTy->isVectorTy()) {
// Bitcast to the right element type (for fp/vector values).
- EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI);
+ EltVal = Builder.CreateBitCast(EltVal, ArrayEltTy);
} else {
// Otherwise, bitcast the dest pointer (for aggregates).
- DestField = new BitCastInst(DestField,
- PointerType::getUnqual(EltVal->getType()),
- "", SI);
+ DestField = Builder.CreateBitCast(DestField,
+ PointerType::getUnqual(EltVal->getType()));
}
new StoreInst(EltVal, DestField, SI);
-
+
if (TD->isBigEndian())
Shift -= ElementOffset;
- else
+ else
Shift += ElementOffset;
}
}
-
+
DeadInsts.push_back(SI);
}
@@ -1603,10 +2071,10 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
// and form the result value.
const Type *AllocaEltTy = AI->getAllocatedType();
uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
-
+
DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
<< '\n');
-
+
// There are two forms here: AI could be an array or struct. Both cases
// have different ways to compute the element offset.
const StructLayout *Layout = 0;
@@ -1616,11 +2084,11 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
} else {
const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
- }
-
- Value *ResultVal =
+ }
+
+ Value *ResultVal =
Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits));
-
+
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// Load the value from the alloca. If the NewElt is an aggregate, cast
// the pointer to an integer of the same size before doing the load.
@@ -1628,11 +2096,11 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
const Type *FieldTy =
cast<PointerType>(SrcField->getType())->getElementType();
uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
-
+
// Ignore zero sized fields like {}, they obviously contain no data.
if (FieldSizeBits == 0) continue;
-
- const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
+
+ const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
FieldSizeBits);
if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
!FieldTy->isVectorTy())
@@ -1650,17 +2118,17 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
// we can shift and insert it.
if (SrcField->getType() != ResultVal->getType())
SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI);
-
+
// Determine the number of bits to shift SrcField.
uint64_t Shift;
if (Layout) // Struct case.
Shift = Layout->getElementOffsetInBits(i);
else // Array case.
Shift = i*ArrayEltBitOffset;
-
+
if (TD->isBigEndian())
Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
-
+
if (Shift) {
Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift);
SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
@@ -1683,46 +2151,39 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
}
/// HasPadding - Return true if the specified type has any structure or
-/// alignment padding, false otherwise.
+/// alignment padding in between the elements that would be split apart
+/// by SROA; return false otherwise.
static bool HasPadding(const Type *Ty, const TargetData &TD) {
- if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty))
- return HasPadding(ATy->getElementType(), TD);
-
- if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
- return HasPadding(VTy->getElementType(), TD);
-
- if (const StructType *STy = dyn_cast<StructType>(Ty)) {
- const StructLayout *SL = TD.getStructLayout(STy);
- unsigned PrevFieldBitOffset = 0;
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
-
- // Padding in sub-elements?
- if (HasPadding(STy->getElementType(i), TD))
- return true;
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Ty = ATy->getElementType();
+ return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
+ }
- // Check to see if there is any padding between this element and the
- // previous one.
- if (i) {
- unsigned PrevFieldEnd =
+ // SROA currently handles only Arrays and Structs.
+ const StructType *STy = cast<StructType>(Ty);
+ const StructLayout *SL = TD.getStructLayout(STy);
+ unsigned PrevFieldBitOffset = 0;
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
+
+ // Check to see if there is any padding between this element and the
+ // previous one.
+ if (i) {
+ unsigned PrevFieldEnd =
PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1));
- if (PrevFieldEnd < FieldBitOffset)
- return true;
- }
-
- PrevFieldBitOffset = FieldBitOffset;
- }
-
- // Check for tail padding.
- if (unsigned EltCount = STy->getNumElements()) {
- unsigned PrevFieldEnd = PrevFieldBitOffset +
- TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
- if (PrevFieldEnd < SL->getSizeInBits())
+ if (PrevFieldEnd < FieldBitOffset)
return true;
}
+ PrevFieldBitOffset = FieldBitOffset;
}
-
- return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
+ // Check for tail padding.
+ if (unsigned EltCount = STy->getNumElements()) {
+ unsigned PrevFieldEnd = PrevFieldBitOffset +
+ TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
+ if (PrevFieldEnd < SL->getSizeInBits())
+ return true;
+ }
+ return false;
}
/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
@@ -1731,14 +2192,14 @@ static bool HasPadding(const Type *Ty, const TargetData &TD) {
bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
// Loop over the use list of the alloca. We can only transform it if all of
// the users are safe to transform.
- AllocaInfo Info;
-
- isSafeForScalarRepl(AI, AI, 0, Info);
+ AllocaInfo Info(AI);
+
+ isSafeForScalarRepl(AI, 0, Info);
if (Info.isUnsafe) {
DEBUG(dbgs() << "Cannot transform: " << *AI << '\n');
return false;
}
-
+
// Okay, we know all the users are promotable. If the aggregate is a memcpy
// source and destination, we have to be careful. In particular, the memcpy
// could be moving around elements that live in structure padding of the LLVM
@@ -1748,6 +2209,20 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
HasPadding(AI->getAllocatedType(), *TD))
return false;
+ // If the alloca never has an access to just *part* of it, but is accessed
+ // via loads and stores, then we should use ConvertToScalarInfo to promote
+ // the alloca instead of promoting each piece at a time and inserting fission
+ // and fusion code.
+ if (!Info.hasSubelementAccess && Info.hasALoadOrStore) {
+ // If the struct/array just has one element, use basic SRoA.
+ if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
+ if (ST->getNumElements() > 1) return false;
+ } else {
+ if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1)
+ return false;
+ }
+ }
+
return true;
}
@@ -1760,7 +2235,7 @@ static bool PointsToConstantGlobal(Value *V) {
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
return GV->isConstant();
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- if (CE->getOpcode() == Instruction::BitCast ||
+ if (CE->getOpcode() == Instruction::BitCast ||
CE->getOpcode() == Instruction::GetElementPtr)
return PointsToConstantGlobal(CE->getOperand(0));
return false;
@@ -1771,18 +2246,19 @@ static bool PointsToConstantGlobal(Value *V) {
/// see any stores or other unknown uses. If we see pointer arithmetic, keep
/// track of whether it moves the pointer (with isOffset) but otherwise traverse
/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
-/// the alloca, and if the source pointer is a pointer to a constant global, we
+/// the alloca, and if the source pointer is a pointer to a constant global, we
/// can optimize this.
static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
bool isOffset) {
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
User *U = cast<Instruction>(*UI);
- if (LoadInst *LI = dyn_cast<LoadInst>(U))
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
// Ignore non-volatile loads, they are always ok.
- if (!LI->isVolatile())
- continue;
-
+ if (LI->isVolatile()) return false;
+ continue;
+ }
+
if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
// If uses of the bitcast are ok, we are ok.
if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset))
@@ -1797,27 +2273,52 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
return false;
continue;
}
-
+
+ if (CallSite CS = U) {
+ // If this is a readonly/readnone call site, then we know it is just a
+ // load and we can ignore it.
+ if (CS.onlyReadsMemory())
+ continue;
+
+ // If this is the function being called then we treat it like a load and
+ // ignore it.
+ if (CS.isCallee(UI))
+ continue;
+
+ // If this is being passed as a byval argument, the caller is making a
+ // copy, so it is only a read of the alloca.
+ unsigned ArgNo = CS.getArgumentNo(UI);
+ if (CS.paramHasAttr(ArgNo+1, Attribute::ByVal))
+ continue;
+ }
+
// If this is isn't our memcpy/memmove, reject it as something we can't
// handle.
MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
if (MI == 0)
return false;
+ // If the transfer is using the alloca as a source of the transfer, then
+ // ignore it since it is a load (unless the transfer is volatile).
+ if (UI.getOperandNo() == 1) {
+ if (MI->isVolatile()) return false;
+ continue;
+ }
+
// If we already have seen a copy, reject the second one.
if (TheCopy) return false;
-
+
// If the pointer has been offset from the start of the alloca, we can't
// safely handle this.
if (isOffset) return false;
// If the memintrinsic isn't using the alloca as the dest, reject it.
if (UI.getOperandNo() != 0) return false;
-
+
// If the source of the memcpy/move is not a constant global, reject it.
if (!PointsToConstantGlobal(MI->getSource()))
return false;
-
+
// Otherwise, the transform is safe. Remember the copy instruction.
TheCopy = MI;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 360749c..ce5dd73 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -42,7 +42,9 @@ STATISTIC(NumSimpl, "Number of blocks simplified");
namespace {
struct CFGSimplifyPass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- CFGSimplifyPass() : FunctionPass(ID) {}
+ CFGSimplifyPass() : FunctionPass(ID) {
+ initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
};
@@ -50,7 +52,7 @@ namespace {
char CFGSimplifyPass::ID = 0;
INITIALIZE_PASS(CFGSimplifyPass, "simplifycfg",
- "Simplify the CFG", false, false);
+ "Simplify the CFG", false, false)
// Public interface to the CFGSimplification pass
FunctionPass *llvm::createCFGSimplificationPass() {
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
index 3ec70ec..70ff32e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
@@ -32,7 +32,9 @@ namespace {
const TargetData *TD;
public:
static char ID; // Pass identification
- SimplifyHalfPowrLibCalls() : FunctionPass(ID) {}
+ SimplifyHalfPowrLibCalls() : FunctionPass(ID) {
+ initializeSimplifyHalfPowrLibCallsPass(*PassRegistry::getPassRegistry());
+ }
bool runOnFunction(Function &F);
@@ -47,7 +49,7 @@ namespace {
} // end anonymous namespace.
INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr",
- "Simplify half_powr library calls", false, false);
+ "Simplify half_powr library calls", false, false)
// Public interface to the Simplify HalfPowr LibCalls pass.
FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() {
@@ -95,7 +97,8 @@ InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
InlineFunctionInfo IFI(0, TD);
bool B = InlineFunction(Call, IFI);
- assert(B && "half_powr didn't inline?"); B=B;
+ assert(B && "half_powr didn't inline?");
+ (void)B;
BasicBlock *NewBody = NewBlock->getSinglePredecessor();
assert(NewBody);
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index d7ce53f..ec45b71 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -123,7 +123,7 @@ struct StrCatOpt : public LibCallOptimization {
// Verify the "strcat" function prototype.
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
- FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
FT->getParamType(0) != FT->getReturnType() ||
FT->getParamType(1) != FT->getReturnType())
return 0;
@@ -160,9 +160,8 @@ struct StrCatOpt : public LibCallOptimization {
// We have enough information to now generate the memcpy call to do the
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
- EmitMemCpy(CpyDst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len+1),
- 1, false, B, TD);
+ B.CreateMemCpy(CpyDst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
}
};
@@ -174,7 +173,7 @@ struct StrNCatOpt : public StrCatOpt {
// Verify the "strncat" function prototype.
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 ||
- FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
FT->getParamType(0) != FT->getReturnType() ||
FT->getParamType(1) != FT->getReturnType() ||
!FT->getParamType(2)->isIntegerTy())
@@ -222,8 +221,9 @@ struct StrChrOpt : public LibCallOptimization {
// Verify the "strchr" function prototype.
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
- FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
- FT->getParamType(0) != FT->getReturnType())
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ !FT->getParamType(1)->isIntegerTy(32))
return 0;
Value *SrcStr = CI->getArgOperand(0);
@@ -252,22 +252,55 @@ struct StrChrOpt : public LibCallOptimization {
// strchr can find the nul character.
Str += '\0';
- char CharValue = CharC->getSExtValue();
// Compute the offset.
- uint64_t i = 0;
- while (1) {
- if (i == Str.size()) // Didn't find the char. strchr returns null.
- return Constant::getNullValue(CI->getType());
- // Did we find our match?
- if (Str[i] == CharValue)
- break;
- ++i;
- }
+ size_t I = Str.find(CharC->getSExtValue());
+ if (I == std::string::npos) // Didn't find the char. strchr returns null.
+ return Constant::getNullValue(CI->getType());
// strchr(s+n,c) -> gep(s+n+i,c)
- Value *Idx = ConstantInt::get(Type::getInt64Ty(*Context), i);
- return B.CreateGEP(SrcStr, Idx, "strchr");
+ return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
+ }
+};
+
+//===---------------------------------------===//
+// 'strrchr' Optimizations
+
+struct StrRChrOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strrchr" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ !FT->getParamType(1)->isIntegerTy(32))
+ return 0;
+
+ Value *SrcStr = CI->getArgOperand(0);
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+
+ // Cannot fold anything if we're not looking for a constant.
+ if (!CharC)
+ return 0;
+
+ std::string Str;
+ if (!GetConstantStringInfo(SrcStr, Str)) {
+ // strrchr(s, 0) -> strchr(s, 0)
+ if (TD && CharC->isZero())
+ return EmitStrChr(SrcStr, '\0', B, TD);
+ return 0;
+ }
+
+ // strrchr can find the nul character.
+ Str += '\0';
+
+ // Compute the offset.
+ size_t I = Str.rfind(CharC->getSExtValue());
+ if (I == std::string::npos) // Didn't find the char. Return null.
+ return Constant::getNullValue(CI->getType());
+
+ // strrchr(s+n,c) -> gep(s+n+i,c)
+ return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
}
};
@@ -281,7 +314,7 @@ struct StrCmpOpt : public LibCallOptimization {
if (FT->getNumParams() != 2 ||
!FT->getReturnType()->isIntegerTy(32) ||
FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != Type::getInt8PtrTy(*Context))
+ FT->getParamType(0) != B.getInt8PtrTy())
return 0;
Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
@@ -329,7 +362,7 @@ struct StrNCmpOpt : public LibCallOptimization {
if (FT->getNumParams() != 3 ||
!FT->getReturnType()->isIntegerTy(32) ||
FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
!FT->getParamType(2)->isIntegerTy())
return 0;
@@ -384,7 +417,7 @@ struct StrCpyOpt : public LibCallOptimization {
if (FT->getNumParams() != NumParams ||
FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != Type::getInt8PtrTy(*Context))
+ FT->getParamType(0) != B.getInt8PtrTy())
return 0;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
@@ -405,9 +438,8 @@ struct StrCpyOpt : public LibCallOptimization {
ConstantInt::get(TD->getIntPtrType(*Context), Len),
CI->getArgOperand(2), B, TD);
else
- EmitMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len),
- 1, false, B, TD);
+ B.CreateMemCpy(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
return Dst;
}
};
@@ -420,7 +452,7 @@ struct StrNCpyOpt : public LibCallOptimization {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
!FT->getParamType(2)->isIntegerTy())
return 0;
@@ -435,8 +467,7 @@ struct StrNCpyOpt : public LibCallOptimization {
if (SrcLen == 0) {
// strncpy(x, "", y) -> memset(x, '\0', y, 1)
- EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'),
- LenOp, false, B, TD);
+ B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1);
return Dst;
}
@@ -455,9 +486,8 @@ struct StrNCpyOpt : public LibCallOptimization {
if (Len > SrcLen+1) return 0;
// strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
- EmitMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len),
- 1, false, B, TD);
+ B.CreateMemCpy(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
return Dst;
}
@@ -470,7 +500,7 @@ struct StrLenOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 1 ||
- FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
!FT->getReturnType()->isIntegerTy())
return 0;
@@ -488,6 +518,45 @@ struct StrLenOpt : public LibCallOptimization {
}
};
+
+//===---------------------------------------===//
+// 'strpbrk' Optimizations
+
+struct StrPBrkOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ FT->getReturnType() != FT->getParamType(0))
+ return 0;
+
+ std::string S1, S2;
+ bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strpbrk(s, "") -> NULL
+ // strpbrk("", s) -> NULL
+ if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2) {
+ size_t I = S1.find_first_of(S2);
+ if (I == std::string::npos) // No match.
+ return Constant::getNullValue(CI->getType());
+
+ return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
+ }
+
+ // strpbrk(s, "a") -> strchr(s, 'a')
+ if (TD && HasS2 && S2.size() == 1)
+ return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD);
+
+ return 0;
+ }
+};
+
//===---------------------------------------===//
// 'strto*' Optimizations. This handles strtol, strtod, strtof, strtoul, etc.
@@ -501,7 +570,8 @@ struct StrToOpt : public LibCallOptimization {
Value *EndPtr = CI->getArgOperand(1);
if (isa<ConstantPointerNull>(EndPtr)) {
- CI->setOnlyReadsMemory();
+ // With a null EndPtr, this function won't capture the main argument.
+ // It would be readonly too, except that it still may write to errno.
CI->addAttribute(1, Attribute::NoCapture);
}
@@ -510,6 +580,67 @@ struct StrToOpt : public LibCallOptimization {
};
//===---------------------------------------===//
+// 'strspn' Optimizations
+
+struct StrSpnOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ std::string S1, S2;
+ bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strspn(s, "") -> 0
+ // strspn("", s) -> 0
+ if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2)
+ return ConstantInt::get(CI->getType(), strspn(S1.c_str(), S2.c_str()));
+
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'strcspn' Optimizations
+
+struct StrCSpnOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ std::string S1, S2;
+ bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strcspn("", s) -> 0
+ if (HasS1 && S1.empty())
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2)
+ return ConstantInt::get(CI->getType(), strcspn(S1.c_str(), S2.c_str()));
+
+ // strcspn(s, "") -> strlen(s)
+ if (TD && HasS2 && S2.empty())
+ return EmitStrLen(CI->getArgOperand(0), B, TD);
+
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
// 'strstr' Optimizations
struct StrStrOpt : public LibCallOptimization {
@@ -637,8 +768,8 @@ struct MemCpyOpt : public LibCallOptimization {
return 0;
// memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
- EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1, false, B, TD);
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
return CI->getArgOperand(0);
}
};
@@ -659,8 +790,8 @@ struct MemMoveOpt : public LibCallOptimization {
return 0;
// memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
- EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1, false, B, TD);
+ B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
return CI->getArgOperand(0);
}
};
@@ -681,9 +812,8 @@ struct MemSetOpt : public LibCallOptimization {
return 0;
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
- Value *Val = B.CreateIntCast(CI->getArgOperand(1),
- Type::getInt8Ty(*Context), false);
- EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD);
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
return CI->getArgOperand(0);
}
};
@@ -765,12 +895,10 @@ struct Exp2Opt : public LibCallOptimization {
Value *LdExpArg = 0;
if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
- LdExpArg = B.CreateSExt(OpC->getOperand(0),
- Type::getInt32Ty(*Context), "tmp");
+ LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty(), "tmp");
} else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
- LdExpArg = B.CreateZExt(OpC->getOperand(0),
- Type::getInt32Ty(*Context), "tmp");
+ LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty(), "tmp");
}
if (LdExpArg) {
@@ -789,7 +917,7 @@ struct Exp2Opt : public LibCallOptimization {
Module *M = Caller->getParent();
Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
Op->getType(),
- Type::getInt32Ty(*Context),NULL);
+ B.getInt32Ty(), NULL);
CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -819,7 +947,7 @@ struct UnaryDoubleFPOpt : public LibCallOptimization {
Value *V = Cast->getOperand(0);
V = EmitUnaryFloatFnCall(V, Callee->getName().data(), B,
Callee->getAttributes());
- return B.CreateFPExt(V, Type::getDoubleTy(*Context));
+ return B.CreateFPExt(V, B.getDoubleTy());
}
};
@@ -846,8 +974,8 @@ struct FFSOpt : public LibCallOptimization {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
if (CI->getValue() == 0) // ffs(0) -> 0.
return Constant::getNullValue(CI->getType());
- return ConstantInt::get(Type::getInt32Ty(*Context), // ffs(c) -> cttz(c)+1
- CI->getValue().countTrailingZeros()+1);
+ // ffs(c) -> cttz(c)+1
+ return B.getInt32(CI->getValue().countTrailingZeros() + 1);
}
// ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
@@ -856,11 +984,10 @@ struct FFSOpt : public LibCallOptimization {
Intrinsic::cttz, &ArgType, 1);
Value *V = B.CreateCall(F, Op, "cttz");
V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp");
- V = B.CreateIntCast(V, Type::getInt32Ty(*Context), false, "tmp");
+ V = B.CreateIntCast(V, B.getInt32Ty(), false, "tmp");
Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp");
- return B.CreateSelect(Cond, V,
- ConstantInt::get(Type::getInt32Ty(*Context), 0));
+ return B.CreateSelect(Cond, V, B.getInt32(0));
}
};
@@ -877,10 +1004,8 @@ struct IsDigitOpt : public LibCallOptimization {
// isdigit(c) -> (c-'0') <u 10
Value *Op = CI->getArgOperand(0);
- Op = B.CreateSub(Op, ConstantInt::get(Type::getInt32Ty(*Context), '0'),
- "isdigittmp");
- Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 10),
- "isdigit");
+ Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp");
+ Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit");
return B.CreateZExt(Op, CI->getType());
}
};
@@ -898,8 +1023,7 @@ struct IsAsciiOpt : public LibCallOptimization {
// isascii(c) -> c <u 128
Value *Op = CI->getArgOperand(0);
- Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 128),
- "isascii");
+ Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii");
return B.CreateZExt(Op, CI->getType());
}
};
@@ -917,8 +1041,7 @@ struct AbsOpt : public LibCallOptimization {
// abs(x) -> x >s -1 ? x : -x
Value *Op = CI->getArgOperand(0);
- Value *Pos = B.CreateICmpSGT(Op,
- Constant::getAllOnesValue(Op->getType()),
+ Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()),
"ispos");
Value *Neg = B.CreateNeg(Op, "neg");
return B.CreateSelect(Pos, Op, Neg);
@@ -969,11 +1092,15 @@ struct PrintFOpt : public LibCallOptimization {
return CI->use_empty() ? (Value*)CI :
ConstantInt::get(CI->getType(), 0);
- // printf("x") -> putchar('x'), even for '%'. Return the result of putchar
- // in case there is an error writing to stdout.
+ // Do not do any of the following transformations if the printf return value
+ // is used, in general the printf return value is not compatible with either
+ // putchar() or puts().
+ if (!CI->use_empty())
+ return 0;
+
+ // printf("x") -> putchar('x'), even for '%'.
if (FormatStr.size() == 1) {
- Value *Res = EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context),
- FormatStr[0]), B, TD);
+ Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD);
if (CI->use_empty()) return CI;
return B.CreateIntCast(Res, CI->getType(), true);
}
@@ -1004,8 +1131,7 @@ struct PrintFOpt : public LibCallOptimization {
// printf("%s\n", str) --> puts(str)
if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
- CI->getArgOperand(1)->getType()->isPointerTy() &&
- CI->use_empty()) {
+ CI->getArgOperand(1)->getType()->isPointerTy()) {
EmitPutS(CI->getArgOperand(1), B, TD);
return CI;
}
@@ -1042,9 +1168,9 @@ struct SPrintFOpt : public LibCallOptimization {
if (!TD) return 0;
// sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
- EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the
- ConstantInt::get(TD->getIntPtrType(*Context), // nul byte.
- FormatStr.size() + 1), 1, false, B, TD);
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ ConstantInt::get(TD->getIntPtrType(*Context), // Copy the
+ FormatStr.size() + 1), 1); // nul byte.
return ConstantInt::get(CI->getType(), FormatStr.size());
}
@@ -1058,13 +1184,11 @@ struct SPrintFOpt : public LibCallOptimization {
if (FormatStr[1] == 'c') {
// sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
- Value *V = B.CreateTrunc(CI->getArgOperand(2),
- Type::getInt8Ty(*Context), "char");
+ Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
B.CreateStore(V, Ptr);
- Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1),
- "nul");
- B.CreateStore(Constant::getNullValue(Type::getInt8Ty(*Context)), Ptr);
+ Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul");
+ B.CreateStore(B.getInt8(0), Ptr);
return ConstantInt::get(CI->getType(), 1);
}
@@ -1080,8 +1204,7 @@ struct SPrintFOpt : public LibCallOptimization {
Value *IncLen = B.CreateAdd(Len,
ConstantInt::get(Len->getType(), 1),
"leninc");
- EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2),
- IncLen, 1, false, B, TD);
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1);
// The sprintf result is the unincremented number of bytes in the string.
return B.CreateIntCast(Len, CI->getType(), false);
@@ -1208,6 +1331,34 @@ struct FPrintFOpt : public LibCallOptimization {
}
};
+//===---------------------------------------===//
+// 'puts' Optimizations
+
+struct PutsOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require one fixed pointer argument and an integer/void result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+ !(FT->getReturnType()->isIntegerTy() ||
+ FT->getReturnType()->isVoidTy()))
+ return 0;
+
+ // Check for a constant string.
+ std::string Str;
+ if (!GetConstantStringInfo(CI->getArgOperand(0), Str))
+ return 0;
+
+ if (Str.empty() && CI->use_empty()) {
+ // puts("") -> putchar('\n')
+ Value *Res = EmitPutChar(B.getInt32('\n'), B, TD);
+ if (CI->use_empty()) return CI;
+ return B.CreateIntCast(Res, CI->getType(), true);
+ }
+
+ return 0;
+ }
+};
+
} // end anonymous namespace.
//===----------------------------------------------------------------------===//
@@ -1220,10 +1371,10 @@ namespace {
class SimplifyLibCalls : public FunctionPass {
StringMap<LibCallOptimization*> Optimizations;
// String and Memory LibCall Optimizations
- StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrCmpOpt StrCmp;
- StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
- StrNCpyOpt StrNCpy; StrLenOpt StrLen;
- StrToOpt StrTo; StrStrOpt StrStr;
+ StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr;
+ StrCmpOpt StrCmp; StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
+ StrNCpyOpt StrNCpy; StrLenOpt StrLen; StrPBrkOpt StrPBrk;
+ StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr;
MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet;
// Math Library Optimizations
PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP;
@@ -1233,11 +1384,14 @@ namespace {
// Formatting and IO Optimizations
SPrintFOpt SPrintF; PrintFOpt PrintF;
FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF;
+ PutsOpt Puts;
bool Modified; // This is only used by doInitialization.
public:
static char ID; // Pass identification
- SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {}
+ SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {
+ initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
+ }
void InitOptimizations();
bool runOnFunction(Function &F);
@@ -1255,7 +1409,7 @@ namespace {
} // end anonymous namespace.
INITIALIZE_PASS(SimplifyLibCalls, "simplify-libcalls",
- "Simplify well-known library calls", false, false);
+ "Simplify well-known library calls", false, false)
// Public interface to the Simplify LibCalls pass.
FunctionPass *llvm::createSimplifyLibCallsPass() {
@@ -1269,11 +1423,13 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["strcat"] = &StrCat;
Optimizations["strncat"] = &StrNCat;
Optimizations["strchr"] = &StrChr;
+ Optimizations["strrchr"] = &StrRChr;
Optimizations["strcmp"] = &StrCmp;
Optimizations["strncmp"] = &StrNCmp;
Optimizations["strcpy"] = &StrCpy;
Optimizations["strncpy"] = &StrNCpy;
Optimizations["strlen"] = &StrLen;
+ Optimizations["strpbrk"] = &StrPBrk;
Optimizations["strtol"] = &StrTo;
Optimizations["strtod"] = &StrTo;
Optimizations["strtof"] = &StrTo;
@@ -1281,6 +1437,8 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["strtoll"] = &StrTo;
Optimizations["strtold"] = &StrTo;
Optimizations["strtoull"] = &StrTo;
+ Optimizations["strspn"] = &StrSpn;
+ Optimizations["strcspn"] = &StrCSpn;
Optimizations["strstr"] = &StrStr;
Optimizations["memcmp"] = &MemCmp;
Optimizations["memcpy"] = &MemCpy;
@@ -1341,6 +1499,7 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["fwrite"] = &FWrite;
Optimizations["fputs"] = &FPuts;
Optimizations["fprintf"] = &FPrintF;
+ Optimizations["puts"] = &Puts;
}
@@ -2155,9 +2314,6 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
// * pow(sqrt(x),y) -> pow(x,y*0.5)
// * pow(pow(x,y),z)-> pow(x,y*z)
//
-// puts:
-// * puts("") -> putchar('\n')
-//
// round, roundf, roundl:
// * round(cnst) -> cnst'
//
@@ -2173,24 +2329,6 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
// stpcpy:
// * stpcpy(str, "literal") ->
// llvm.memcpy(str,"literal",strlen("literal")+1,1)
-// strrchr:
-// * strrchr(s,c) -> reverse_offset_of_in(c,s)
-// (if c is a constant integer and s is a constant string)
-// * strrchr(s1,0) -> strchr(s1,0)
-//
-// strpbrk:
-// * strpbrk(s,a) -> offset_in_for(s,a)
-// (if s and a are both constant strings)
-// * strpbrk(s,"") -> 0
-// * strpbrk(s,a) -> strchr(s,a[0]) (if a is constant string of length 1)
-//
-// strspn, strcspn:
-// * strspn(s,a) -> const_int (if both args are constant)
-// * strspn("",a) -> 0
-// * strspn(s,"") -> 0
-// * strcspn(s,a) -> const_int (if both args are constant)
-// * strcspn("",a) -> 0
-// * strcspn(s,"") -> strlen(a)
//
// tan, tanf, tanl:
// * tan(atan(x)) -> x
diff --git a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
index 95d3ded..705f442 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -35,7 +35,9 @@ namespace {
public:
static char ID; // Pass identification
- Sinking() : FunctionPass(ID) {}
+ Sinking() : FunctionPass(ID) {
+ initializeSinkingPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
@@ -56,7 +58,11 @@ namespace {
} // end anonymous namespace
char Sinking::ID = 0;
-INITIALIZE_PASS(Sinking, "sink", "Code sinking", false, false);
+INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(Sinking, "sink", "Code sinking", false, false)
FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
@@ -150,11 +156,10 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
if (L->isVolatile()) return false;
- Value *Ptr = L->getPointerOperand();
- unsigned Size = AA->getTypeStoreSize(L->getType());
+ AliasAnalysis::Location Loc = AA->getLocation(L);
for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(),
E = Stores.end(); I != E; ++I)
- if (AA->getModRefInfo(*I, Ptr, Size) & AliasAnalysis::Mod)
+ if (AA->getModRefInfo(*I, Loc) & AliasAnalysis::Mod)
return false;
}
@@ -163,7 +168,10 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
return false;
}
- return Inst->isSafeToSpeculativelyExecute();
+ if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst))
+ return false;
+
+ return true;
}
/// SinkInstruction - Determine whether it is safe to sink the specified machine
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp b/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp
index 2e437ac..9dd83c0 100644
--- a/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp
@@ -26,14 +26,14 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
#include "llvm/Type.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Support/CFG.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <map>
using namespace llvm;
@@ -49,7 +49,9 @@ namespace {
bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
- TailDup() : FunctionPass(ID) {}
+ TailDup() : FunctionPass(ID) {
+ initializeTailDupPass(*PassRegistry::getPassRegistry());
+ }
private:
inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned);
@@ -59,7 +61,7 @@ namespace {
}
char TailDup::ID = 0;
-INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false);
+INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false)
// Public interface to the Tail Duplication pass
FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); }
@@ -360,8 +362,8 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
Instruction *Inst = BI++;
if (isInstructionTriviallyDead(Inst))
Inst->eraseFromParent();
- else if (Constant *C = ConstantFoldInstruction(Inst)) {
- Inst->replaceAllUsesWith(C);
+ else if (Value *V = SimplifyInstruction(Inst)) {
+ Inst->replaceAllUsesWith(V);
Inst->eraseFromParent();
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 3717254..5b6bc04 100644
--- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -52,31 +52,52 @@
#define DEBUG_TYPE "tailcallelim"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
using namespace llvm;
STATISTIC(NumEliminated, "Number of tail calls removed");
+STATISTIC(NumRetDuped, "Number of return duplicated");
STATISTIC(NumAccumAdded, "Number of accumulators introduced");
namespace {
struct TailCallElim : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- TailCallElim() : FunctionPass(ID) {}
+ TailCallElim() : FunctionPass(ID) {
+ initializeTailCallElimPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
private:
+ CallInst *FindTRECandidate(Instruction *I,
+ bool CannotTailCallElimCallsMarkedTail);
+ bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
+ BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail);
+ bool FoldReturnAndProcessPred(BasicBlock *BB,
+ ReturnInst *Ret, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail);
bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry,
bool &TailCallsAreMarkedTail,
SmallVector<PHINode*, 8> &ArgumentPHIs,
@@ -88,7 +109,7 @@ namespace {
char TailCallElim::ID = 0;
INITIALIZE_PASS(TailCallElim, "tailcallelim",
- "Tail Call Elimination", false, false);
+ "Tail Call Elimination", false, false)
// Public interface to the TailCallElimination pass
FunctionPass *llvm::createTailCallEliminationPass() {
@@ -133,7 +154,6 @@ bool TailCallElim::runOnFunction(Function &F) {
bool TailCallsAreMarkedTail = false;
SmallVector<PHINode*, 8> ArgumentPHIs;
bool MadeChange = false;
-
bool FunctionContainsEscapingAllocas = false;
// CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls
@@ -160,10 +180,17 @@ bool TailCallElim::runOnFunction(Function &F) {
return false;
// Second pass, change any tail calls to loops.
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator()))
- MadeChange |= ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
ArgumentPHIs,CannotTCETailMarkedCall);
+ if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
+ Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
+ TailCallsAreMarkedTail, ArgumentPHIs,
+ CannotTCETailMarkedCall);
+ MadeChange |= Change;
+ }
+ }
// If we eliminated any tail recursions, it's possible that we inserted some
// silly PHI nodes which just merge an initial value (the incoming operand)
@@ -175,7 +202,7 @@ bool TailCallElim::runOnFunction(Function &F) {
PHINode *PN = ArgumentPHIs[i];
// If the PHI Node is a dynamic constant, replace it with the value it is.
- if (Value *PNV = PN->hasConstantValue()) {
+ if (Value *PNV = SimplifyInstruction(PN)) {
PN->replaceAllUsesWith(PNV);
PN->eraseFromParent();
}
@@ -322,41 +349,47 @@ Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
return getCommonReturnValue(cast<ReturnInst>(I->use_back()), CI);
}
-bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
- bool &TailCallsAreMarkedTail,
- SmallVector<PHINode*, 8> &ArgumentPHIs,
- bool CannotTailCallElimCallsMarkedTail) {
- BasicBlock *BB = Ret->getParent();
+static Instruction *FirstNonDbg(BasicBlock::iterator I) {
+ while (isa<DbgInfoIntrinsic>(I))
+ ++I;
+ return &*I;
+}
+
+CallInst*
+TailCallElim::FindTRECandidate(Instruction *TI,
+ bool CannotTailCallElimCallsMarkedTail) {
+ BasicBlock *BB = TI->getParent();
Function *F = BB->getParent();
- if (&BB->front() == Ret) // Make sure there is something before the ret...
- return false;
+ if (&BB->front() == TI) // Make sure there is something before the terminator.
+ return 0;
// Scan backwards from the return, checking to see if there is a tail call in
// this block. If so, set CI to it.
- CallInst *CI;
- BasicBlock::iterator BBI = Ret;
- while (1) {
+ CallInst *CI = 0;
+ BasicBlock::iterator BBI = TI;
+ while (true) {
CI = dyn_cast<CallInst>(BBI);
if (CI && CI->getCalledFunction() == F)
break;
if (BBI == BB->begin())
- return false; // Didn't find a potential tail call.
+ return 0; // Didn't find a potential tail call.
--BBI;
}
// If this call is marked as a tail call, and if there are dynamic allocas in
// the function, we cannot perform this optimization.
if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
- return false;
+ return 0;
// As a special case, detect code like this:
// double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
// and disable this xform in this case, because the code generator will
// lower the call to fabs into inline code.
if (BB == &F->getEntryBlock() &&
- &BB->front() == CI && &*++BB->begin() == Ret &&
+ FirstNonDbg(BB->front()) == CI &&
+ FirstNonDbg(llvm::next(BB->begin())) == TI &&
callIsSmall(F)) {
// A single-block function with just a call and a return. Check that
// the arguments match.
@@ -367,9 +400,17 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
for (; I != E && FI != FE; ++I, ++FI)
if (*I != &*FI) break;
if (I == E && FI == FE)
- return false;
+ return 0;
}
+ return CI;
+}
+
+bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
+ BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail) {
// If we are introducing accumulator recursion to eliminate operations after
// the call instruction that are both associative and commutative, the initial
// value for the accumulator is placed in this variable. If this value is set
@@ -387,7 +428,8 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
// tail call if all of the instructions between the call and the return are
// movable to above the call itself, leaving the call next to the return.
// Check that this is the case now.
- for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) {
+ BasicBlock::iterator BBI = CI;
+ for (++BBI; &*BBI != Ret; ++BBI) {
if (CanMoveAboveCall(BBI, CI)) continue;
// If we can't move the instruction above the call, it might be because it
@@ -424,6 +466,9 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
return false;
}
+ BasicBlock *BB = Ret->getParent();
+ Function *F = BB->getParent();
+
// OK! We can transform this tail call. If this is the first one found,
// create the new entry block, allowing us to branch back to the old entry.
if (OldEntry == 0) {
@@ -533,3 +578,53 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
++NumEliminated;
return true;
}
+
+bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB,
+ ReturnInst *Ret, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail) {
+ bool Change = false;
+
+ // If the return block contains nothing but the return and PHI's,
+ // there might be an opportunity to duplicate the return in its
+ // predecessors and perform TRC there. Look for predecessors that end
+ // in unconditional branch and recursive call(s).
+ SmallVector<BranchInst*, 8> UncondBranchPreds;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *Pred = *PI;
+ TerminatorInst *PTI = Pred->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(PTI))
+ if (BI->isUnconditional())
+ UncondBranchPreds.push_back(BI);
+ }
+
+ while (!UncondBranchPreds.empty()) {
+ BranchInst *BI = UncondBranchPreds.pop_back_val();
+ BasicBlock *Pred = BI->getParent();
+ if (CallInst *CI = FindTRECandidate(BI, CannotTailCallElimCallsMarkedTail)){
+ DEBUG(dbgs() << "FOLDING: " << *BB
+ << "INTO UNCOND BRANCH PRED: " << *Pred);
+ EliminateRecursiveTailCall(CI, FoldReturnIntoUncondBranch(Ret, BB, Pred),
+ OldEntry, TailCallsAreMarkedTail, ArgumentPHIs,
+ CannotTailCallElimCallsMarkedTail);
+ ++NumRetDuped;
+ Change = true;
+ }
+ }
+
+ return Change;
+}
+
+bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail) {
+ CallInst *CI = FindTRECandidate(Ret, CannotTailCallElimCallsMarkedTail);
+ if (!CI)
+ return false;
+
+ return EliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail,
+ ArgumentPHIs,
+ CannotTailCallElimCallsMarkedTail);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp b/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp
index 4d64c85..be7bed1 100644
--- a/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -21,6 +21,7 @@
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CallSite.h"
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -379,27 +380,10 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
/// return false.
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
const TargetLowering &TLI) {
- std::vector<InlineAsm::ConstraintInfo>
- Constraints = IA->ParseConstraints();
-
- unsigned ArgNo = 0; // The argument of the CallInst.
- for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
- TargetLowering::AsmOperandInfo OpInfo(Constraints[i]);
-
- // Compute the value type for each operand.
- switch (OpInfo.Type) {
- case InlineAsm::isOutput:
- if (OpInfo.isIndirect)
- OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++);
- break;
- case InlineAsm::isInput:
- OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++);
- break;
- case InlineAsm::isClobber:
- // Nothing to do.
- break;
- }
-
+ TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+
// Compute the constraint code and ConstraintType to use.
TLI.ComputeConstraintToUse(OpInfo, SDValue());
@@ -584,7 +568,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
MemoryInst, Result);
Matcher.IgnoreProfitability = true;
bool Success = Matcher.MatchAddr(Address, 0);
- Success = Success; assert(Success && "Couldn't select *anything*?");
+ (void)Success; assert(Success && "Couldn't select *anything*?");
// If the match didn't cover I, then it won't be shared by it.
if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 093083a..acaea19 100644
--- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -19,8 +19,9 @@
#include "llvm/Constant.h"
#include "llvm/Type.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Scalar.h"
@@ -63,12 +64,27 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
/// any single-entry PHI nodes in it, fold them away. This handles the case
/// when all entries to the PHI nodes in a block are guaranteed equal, such as
/// when the block has exactly one predecessor.
-void llvm::FoldSingleEntryPHINodes(BasicBlock *BB) {
+void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) {
+ if (!isa<PHINode>(BB->begin())) return;
+
+ AliasAnalysis *AA = 0;
+ MemoryDependenceAnalysis *MemDep = 0;
+ if (P) {
+ AA = P->getAnalysisIfAvailable<AliasAnalysis>();
+ MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>();
+ }
+
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
if (PN->getIncomingValue(0) != PN)
PN->replaceAllUsesWith(PN->getIncomingValue(0));
else
PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+
+ if (MemDep)
+ MemDep->removeInstruction(PN); // Memdep updates AA itself.
+ else if (AA && isa<PointerType>(PN->getType()))
+ AA->deleteValue(PN);
+
PN->eraseFromParent();
}
}
@@ -110,7 +126,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
if (isa<InvokeInst>(PredBB->getTerminator())) return false;
succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB));
- BasicBlock* OnlySucc = BB;
+ BasicBlock *OnlySucc = BB;
for (; SI != SE; ++SI)
if (*SI != OnlySucc) {
OnlySucc = 0; // There are multiple distinct successors!
@@ -131,10 +147,8 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
}
// Begin by getting rid of unneeded PHIs.
- while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
- PN->replaceAllUsesWith(PN->getIncomingValue(0));
- BB->getInstList().pop_front(); // Delete the phi node...
- }
+ if (isa<PHINode>(BB->front()))
+ FoldSingleEntryPHINodes(BB, P);
// Delete the unconditional branch from the predecessor...
PredBB->getInstList().pop_back();
@@ -152,24 +166,27 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
// Finally, erase the old block and update dominator info.
if (P) {
- if (DominatorTree* DT = P->getAnalysisIfAvailable<DominatorTree>()) {
- DomTreeNode* DTN = DT->getNode(BB);
- DomTreeNode* PredDTN = DT->getNode(PredBB);
-
- if (DTN) {
- SmallPtrSet<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
- for (SmallPtrSet<DomTreeNode*, 8>::iterator DI = Children.begin(),
+ if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+ if (DomTreeNode *DTN = DT->getNode(BB)) {
+ DomTreeNode *PredDTN = DT->getNode(PredBB);
+ SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
+ for (SmallVector<DomTreeNode*, 8>::iterator DI = Children.begin(),
DE = Children.end(); DI != DE; ++DI)
DT->changeImmediateDominator(*DI, PredDTN);
DT->eraseNode(BB);
}
+
+ if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
+ LI->removeBlock(BB);
+
+ if (MemoryDependenceAnalysis *MD =
+ P->getAnalysisIfAvailable<MemoryDependenceAnalysis>())
+ MD->invalidateCachedPredecessors();
}
}
BB->eraseFromParent();
-
-
return true;
}
@@ -218,52 +235,6 @@ void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);
}
-/// RemoveSuccessor - Change the specified terminator instruction such that its
-/// successor SuccNum no longer exists. Because this reduces the outgoing
-/// degree of the current basic block, the actual terminator instruction itself
-/// may have to be changed. In the case where the last successor of the block
-/// is deleted, a return instruction is inserted in its place which can cause a
-/// surprising change in program behavior if it is not expected.
-///
-void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {
- assert(SuccNum < TI->getNumSuccessors() &&
- "Trying to remove a nonexistant successor!");
-
- // If our old successor block contains any PHI nodes, remove the entry in the
- // PHI nodes that comes from this branch...
- //
- BasicBlock *BB = TI->getParent();
- TI->getSuccessor(SuccNum)->removePredecessor(BB);
-
- TerminatorInst *NewTI = 0;
- switch (TI->getOpcode()) {
- case Instruction::Br:
- // If this is a conditional branch... convert to unconditional branch.
- if (TI->getNumSuccessors() == 2) {
- cast<BranchInst>(TI)->setUnconditionalDest(TI->getSuccessor(1-SuccNum));
- } else { // Otherwise convert to a return instruction...
- Value *RetVal = 0;
-
- // Create a value to return... if the function doesn't return null...
- if (!BB->getParent()->getReturnType()->isVoidTy())
- RetVal = Constant::getNullValue(BB->getParent()->getReturnType());
-
- // Create the return...
- NewTI = ReturnInst::Create(TI->getContext(), RetVal);
- }
- break;
-
- case Instruction::Invoke: // Should convert to call
- case Instruction::Switch: // Should remove entry
- default:
- case Instruction::Ret: // Cannot happen, has no successors!
- llvm_unreachable("Unhandled terminator inst type in RemoveSuccessor!");
- }
-
- if (NewTI) // If it's a different instruction, replace.
- ReplaceInstWithInst(TI, NewTI);
-}
-
/// GetSuccessorNumber - Search for the specified successor of basic block BB
/// and return its position in the terminator instruction's list of
/// successors. It is an error to call this with a block that is not a
@@ -300,13 +271,13 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
assert(SP == BB && "CFG broken");
SP = NULL;
return SplitBlock(Succ, Succ->begin(), P);
- } else {
- // Otherwise, if BB has a single successor, split it at the bottom of the
- // block.
- assert(BB->getTerminator()->getNumSuccessors() == 1 &&
- "Should have a single succ!");
- return SplitBlock(BB, BB->getTerminator(), P);
}
+
+ // Otherwise, if BB has a single successor, split it at the bottom of the
+ // block.
+ assert(BB->getTerminator()->getNumSuccessors() == 1 &&
+ "Should have a single succ!");
+ return SplitBlock(BB, BB->getTerminator(), P);
}
/// SplitBlock - Split the specified block at the specified instruction - every
@@ -322,12 +293,12 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
// The new block lives in whichever loop the old one did. This preserves
// LCSSA as well, because we force the split point to be after any PHI nodes.
- if (LoopInfo* LI = P->getAnalysisIfAvailable<LoopInfo>())
+ if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
if (Loop *L = LI->getLoopFor(Old))
L->addBasicBlockToLoop(New, LI->getBase());
if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
- // Old dominates New. New node domiantes all other nodes dominated by Old.
+ // Old dominates New. New node dominates all other nodes dominated by Old.
DomTreeNode *OldNode = DT->getNode(Old);
std::vector<DomTreeNode *> Children;
for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
@@ -340,9 +311,6 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
DT->changeImmediateDominator(*I, NewNode);
}
- if (DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>())
- DF->splitBlock(Old);
-
return New;
}
@@ -354,10 +322,9 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
/// suffix of 'Suffix'.
///
/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
-/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses.
-/// In particular, it does not preserve LoopSimplify (because it's
-/// complicated to handle the case where one of the edges being split
-/// is an exit of a loop with other exits).
+/// LoopInfo, and LCCSA but no other analyses. In particular, it does not
+/// preserve LoopSimplify (because it's complicated to handle the case where one
+/// of the edges being split is an exit of a loop with other exits).
///
BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
BasicBlock *const *Preds,
@@ -407,13 +374,10 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
}
}
- // Update dominator tree and dominator frontier if available.
+ // Update dominator tree if available.
DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0;
if (DT)
DT->splitBlock(NewBB);
- if (DominanceFrontier *DF =
- P ? P->getAnalysisIfAvailable<DominanceFrontier>() : 0)
- DF->splitBlock(NewBB);
// Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
// node becomes an incoming value for BB's phi node. However, if the Preds
@@ -545,7 +509,32 @@ void llvm::FindFunctionBackedges(const Function &F,
// Go up one level.
InStack.erase(VisitStack.pop_back_val().first);
}
- } while (!VisitStack.empty());
-
-
+ } while (!VisitStack.empty());
+}
+
+/// FoldReturnIntoUncondBranch - This method duplicates the specified return
+/// instruction into a predecessor which ends in an unconditional branch. If
+/// the return instruction returns a value defined by a PHI, propagate the
+/// right value into the return. It returns the new return instruction in the
+/// predecessor.
+ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
+ BasicBlock *Pred) {
+ Instruction *UncondBranch = Pred->getTerminator();
+ // Clone the return and add it to the end of the predecessor.
+ Instruction *NewRet = RI->clone();
+ Pred->getInstList().push_back(NewRet);
+
+ // If the return instruction returns a value, and if the value was a
+ // PHI node in "BB", propagate the right value into the return.
+ for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
+ i != e; ++i)
+ if (PHINode *PN = dyn_cast<PHINode>(*i))
+ if (PN->getParent() == BB)
+ *i = PN->getIncomingValueForBlock(Pred);
+
+ // Update any PHI nodes in the returning block to realize that we no
+ // longer branch to them.
+ BB->removePredecessor(Pred);
+ UncondBranch->eraseFromParent();
+ return cast<ReturnInst>(NewRet);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index f75ffe6..616b066 100644
--- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -11,8 +11,7 @@
// inserting a dummy basic block. This pass may be "required" by passes that
// cannot deal with critical edges. For this usage, the structure type is
// forward declared. This pass obviously invalidates the CFG, but can update
-// forward dominator (set, immediate dominators, tree, and frontier)
-// information.
+// dominator trees.
//
//===----------------------------------------------------------------------===//
@@ -36,13 +35,14 @@ STATISTIC(NumBroken, "Number of blocks inserted");
namespace {
struct BreakCriticalEdges : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- BreakCriticalEdges() : FunctionPass(ID) {}
+ BreakCriticalEdges() : FunctionPass(ID) {
+ initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
AU.addPreserved<LoopInfo>();
AU.addPreserved<ProfileInfo>();
@@ -54,7 +54,7 @@ namespace {
char BreakCriticalEdges::ID = 0;
INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
- "Break critical edges in CFG", false, false);
+ "Break critical edges in CFG", false, false)
// Publically exposed interface to pass...
char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID;
@@ -150,10 +150,9 @@ static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds,
}
/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to
-/// split the critical edge. This will update DominatorTree and
-/// DominatorFrontier information if it is available, thus calling this pass
-/// will not invalidate either of them. This returns the new block if the edge
-/// was split, null otherwise.
+/// split the critical edge. This will update DominatorTree information if it
+/// is available, thus calling this pass will not invalidate either of them.
+/// This returns the new block if the edge was split, null otherwise.
///
/// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the
/// specified successor will be merged into the same critical edge block.
@@ -255,12 +254,11 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
if (P == 0) return NewBB;
DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
- DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>();
LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
// If we have nothing to update, just return.
- if (DT == 0 && DF == 0 && LI == 0 && PI == 0)
+ if (DT == 0 && LI == 0 && PI == 0)
return NewBB;
// Now update analysis information. Since the only predecessor of NewBB is
@@ -281,7 +279,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
I != E; ++I) {
BasicBlock *P = *I;
if (P != NewBB)
- OtherPreds.push_back(P);
+ OtherPreds.push_back(P);
}
}
@@ -318,40 +316,6 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
}
}
- // Should we update DominanceFrontier information?
- if (DF) {
- // If NewBBDominatesDestBB hasn't been computed yet, do so with DF.
- if (!OtherPreds.empty()) {
- // FIXME: IMPLEMENT THIS!
- llvm_unreachable("Requiring domfrontiers but not idom/domtree/domset."
- " not implemented yet!");
- }
-
- // Since the new block is dominated by its only predecessor TIBB,
- // it cannot be in any block's dominance frontier. If NewBB dominates
- // DestBB, its dominance frontier is the same as DestBB's, otherwise it is
- // just {DestBB}.
- DominanceFrontier::DomSetType NewDFSet;
- if (NewBBDominatesDestBB) {
- DominanceFrontier::iterator I = DF->find(DestBB);
- if (I != DF->end()) {
- DF->addBasicBlock(NewBB, I->second);
-
- if (I->second.count(DestBB)) {
- // However NewBB's frontier does not include DestBB.
- DominanceFrontier::iterator NF = DF->find(NewBB);
- DF->removeFromFrontier(NF, DestBB);
- }
- }
- else
- DF->addBasicBlock(NewBB, DominanceFrontier::DomSetType());
- } else {
- DominanceFrontier::DomSetType NewDFSet;
- NewDFSet.insert(DestBB);
- DF->addBasicBlock(NewBB, NewDFSet);
- }
- }
-
// Update LoopInfo if it is around.
if (LI) {
if (Loop *TIL = LI->getLoopFor(TIBB)) {
diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index c313949..4a90751 100644
--- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -131,21 +131,6 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
return CI;
}
-
-/// EmitMemCpy - Emit a call to the memcpy function to the builder. This always
-/// expects that Len has type 'intptr_t' and Dst/Src are pointers.
-Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align,
- bool isVolatile, IRBuilder<> &B, const TargetData *TD) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- Dst = CastToCStr(Dst, B);
- Src = CastToCStr(Src, B);
- const Type *ArgTys[3] = { Dst->getType(), Src->getType(), Len->getType() };
- Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, ArgTys, 3);
- return B.CreateCall5(MemCpy, Dst, Src, Len,
- ConstantInt::get(B.getInt32Ty(), Align),
- ConstantInt::get(B.getInt1Ty(), isVolatile));
-}
-
/// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder.
/// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
/// are pointers.
@@ -170,22 +155,6 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
return CI;
}
-/// EmitMemMove - Emit a call to the memmove function to the builder. This
-/// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
-Value *llvm::EmitMemMove(Value *Dst, Value *Src, Value *Len, unsigned Align,
- bool isVolatile, IRBuilder<> &B, const TargetData *TD) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- LLVMContext &Context = B.GetInsertBlock()->getContext();
- const Type *ArgTys[3] = { Dst->getType(), Src->getType(),
- TD->getIntPtrType(Context) };
- Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, ArgTys, 3);
- Dst = CastToCStr(Dst, B);
- Src = CastToCStr(Src, B);
- Value *A = ConstantInt::get(B.getInt32Ty(), Align);
- Value *Vol = ConstantInt::get(B.getInt1Ty(), isVolatile);
- return B.CreateCall5(MemMove, Dst, Src, Len, A, Vol);
-}
-
/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is
/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
@@ -233,18 +202,6 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
return CI;
}
-/// EmitMemSet - Emit a call to the memset function
-Value *llvm::EmitMemSet(Value *Dst, Value *Val, Value *Len, bool isVolatile,
- IRBuilder<> &B, const TargetData *TD) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- Intrinsic::ID IID = Intrinsic::memset;
- const Type *Tys[2] = { Dst->getType(), Len->getType() };
- Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 2);
- Value *Align = ConstantInt::get(B.getInt32Ty(), 1);
- Value *Vol = ConstantInt::get(B.getInt1Ty(), isVolatile);
- return B.CreateCall5(MemSet, CastToCStr(Dst, B), Val, Len, Align, Vol);
-}
-
/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
/// 'floor'). This function is known to take a single of type matching 'Op' and
/// returns one value with the same type. If 'Op' is a long double, 'l' is
@@ -422,8 +379,8 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
return false;
if (isFoldable(3, 2, false)) {
- EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1, false, B, TD);
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
replaceCall(CI->getArgOperand(0));
return true;
}
@@ -445,8 +402,8 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
return false;
if (isFoldable(3, 2, false)) {
- EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1, false, B, TD);
+ B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
replaceCall(CI->getArgOperand(0));
return true;
}
@@ -465,8 +422,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
if (isFoldable(3, 2, false)) {
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
false);
- EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2),
- false, B, TD);
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
replaceCall(CI->getArgOperand(0));
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
index f43186e..d967ceb 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -112,8 +112,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
const BasicBlock &BB = *BI;
// Create a new basic block and copy instructions into it!
- BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc,
- CodeInfo);
+ BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo);
VMap[&BB] = CBB; // Add basic block mapping.
if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
@@ -122,12 +121,12 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Loop over all of the instructions in the function, fixing up operand
// references as we go. This uses VMap to do all the hard work.
- //
for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]),
BE = NewFunc->end(); BB != BE; ++BB)
// Loop over all instructions, fixing each one as we find it...
for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
- RemapInstruction(II, VMap, ModuleLevelChanges);
+ RemapInstruction(II, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
}
/// CloneFunction - Return a copy of the specified function, but without
@@ -138,8 +137,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
/// updated to include mappings from all of the instructions and basicblocks in
/// the function from their old to new values.
///
-Function *llvm::CloneFunction(const Function *F,
- ValueToValueMapTy &VMap,
+Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
bool ModuleLevelChanges,
ClonedCodeInfo *CodeInfo) {
std::vector<const Type*> ArgTypes;
@@ -216,7 +214,7 @@ namespace {
/// anything that it can reach.
void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
std::vector<const BasicBlock*> &ToClone){
- Value *&BBEntry = VMap[BB];
+ TrackingVH<Value> &BBEntry = VMap[BB];
// Have we already cloned this block?
if (BBEntry) return;
@@ -262,8 +260,10 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// If the condition was a known constant in the callee...
ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
// Or is a known constant in the caller...
- if (Cond == 0)
- Cond = dyn_cast_or_null<ConstantInt>(VMap[BI->getCondition()]);
+ if (Cond == 0) {
+ Value *V = VMap[BI->getCondition()];
+ Cond = dyn_cast_or_null<ConstantInt>(V);
+ }
// Constant fold to uncond branch!
if (Cond) {
@@ -276,8 +276,10 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
} else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
// If switching on a value known constant in the caller.
ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
- if (Cond == 0) // Or known constant after constant prop in the callee...
- Cond = dyn_cast_or_null<ConstantInt>(VMap[SI->getCondition()]);
+ if (Cond == 0) { // Or known constant after constant prop in the callee...
+ Value *V = VMap[SI->getCondition()];
+ Cond = dyn_cast_or_null<ConstantInt>(V);
+ }
if (Cond) { // Constant fold to uncond branch!
BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond));
VMap[OldTI] = BranchInst::Create(Dest, NewBB);
@@ -318,7 +320,8 @@ ConstantFoldMappedInstruction(const Instruction *I) {
SmallVector<Constant*, 8> Ops;
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
- VMap, ModuleLevelChanges)))
+ VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges)))
Ops.push_back(Op);
else
return 0; // All operands not constant!
@@ -394,7 +397,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
SmallVector<const PHINode*, 16> PHIToResolve;
for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
BI != BE; ++BI) {
- BasicBlock *NewBB = cast_or_null<BasicBlock>(VMap[BI]);
+ Value *V = VMap[BI];
+ BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
if (NewBB == 0) continue; // Dead block.
// Add the new block to the new function.
@@ -455,7 +459,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
I->setDebugLoc(DebugLoc());
}
}
- RemapInstruction(I, VMap, ModuleLevelChanges);
+ RemapInstruction(I, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
}
}
@@ -474,10 +479,11 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
OPN = PHIToResolve[phino];
PHINode *PN = cast<PHINode>(VMap[OPN]);
for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
- if (BasicBlock *MappedBlock =
- cast_or_null<BasicBlock>(VMap[PN->getIncomingBlock(pred)])) {
+ Value *V = VMap[PN->getIncomingBlock(pred)];
+ if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
Value *InVal = MapValue(PN->getIncomingValue(pred),
- VMap, ModuleLevelChanges);
+ VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
assert(InVal && "Unknown input value?");
PN->setIncomingValue(pred, InVal);
PN->setIncomingBlock(pred, MappedBlock);
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneLoop.cpp b/contrib/llvm/lib/Transforms/Utils/CloneLoop.cpp
index 551b630..87dd141 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneLoop.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneLoop.cpp
@@ -19,15 +19,14 @@
using namespace llvm;
-/// CloneDominatorInfo - Clone basicblock's dominator tree and, if available,
-/// dominance info. It is expected that basic block is already cloned.
+/// CloneDominatorInfo - Clone a basic block's dominator tree. It is expected
+/// that the basic block is already cloned.
static void CloneDominatorInfo(BasicBlock *BB,
- ValueMap<const Value *, Value *> &VMap,
- DominatorTree *DT,
- DominanceFrontier *DF) {
+ ValueToValueMapTy &VMap,
+ DominatorTree *DT) {
assert (DT && "DominatorTree is not available");
- ValueMap<const Value *, Value*>::iterator BI = VMap.find(BB);
+ ValueToValueMapTy::iterator BI = VMap.find(BB);
assert (BI != VMap.end() && "BasicBlock clone is missing");
BasicBlock *NewBB = cast<BasicBlock>(BI->second);
@@ -42,45 +41,23 @@ static void CloneDominatorInfo(BasicBlock *BB,
// NewBB's dominator is either BB's dominator or BB's dominator's clone.
BasicBlock *NewBBDom = BBDom;
- ValueMap<const Value *, Value*>::iterator BBDomI = VMap.find(BBDom);
+ ValueToValueMapTy::iterator BBDomI = VMap.find(BBDom);
if (BBDomI != VMap.end()) {
NewBBDom = cast<BasicBlock>(BBDomI->second);
if (!DT->getNode(NewBBDom))
- CloneDominatorInfo(BBDom, VMap, DT, DF);
+ CloneDominatorInfo(BBDom, VMap, DT);
}
DT->addNewBlock(NewBB, NewBBDom);
-
- // Copy cloned dominance frontiner set
- if (DF) {
- DominanceFrontier::DomSetType NewDFSet;
- DominanceFrontier::iterator DFI = DF->find(BB);
- if ( DFI != DF->end()) {
- DominanceFrontier::DomSetType S = DFI->second;
- for (DominanceFrontier::DomSetType::iterator I = S.begin(), E = S.end();
- I != E; ++I) {
- BasicBlock *DB = *I;
- ValueMap<const Value*, Value*>::iterator IDM = VMap.find(DB);
- if (IDM != VMap.end())
- NewDFSet.insert(cast<BasicBlock>(IDM->second));
- else
- NewDFSet.insert(DB);
- }
- }
- DF->addBasicBlock(NewBB, NewDFSet);
- }
}
/// CloneLoop - Clone Loop. Clone dominator info. Populate VMap
/// using old blocks to new blocks mapping.
Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI,
- ValueMap<const Value *, Value *> &VMap, Pass *P) {
+ ValueToValueMapTy &VMap, Pass *P) {
DominatorTree *DT = NULL;
- DominanceFrontier *DF = NULL;
- if (P) {
+ if (P)
DT = P->getAnalysisIfAvailable<DominatorTree>();
- DF = P->getAnalysisIfAvailable<DominanceFrontier>();
- }
SmallVector<BasicBlock *, 16> NewBlocks;
@@ -116,7 +93,7 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI,
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I) {
BasicBlock *BB = *I;
- CloneDominatorInfo(BB, VMap, DT, DF);
+ CloneDominatorInfo(BB, VMap, DT);
}
// Process sub loops
@@ -134,7 +111,7 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI,
for (unsigned index = 0, num_ops = Insn->getNumOperands();
index != num_ops; ++index) {
Value *Op = Insn->getOperand(index);
- ValueMap<const Value *, Value *>::iterator OpItr = VMap.find(Op);
+ ValueToValueMapTy::iterator OpItr = VMap.find(Op);
if (OpItr != VMap.end())
Insn->setOperand(index, OpItr->second);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
index b347bf5..1046c38 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -89,8 +89,7 @@ Module *llvm::CloneModule(const Module *M,
GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
if (I->hasInitializer())
GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(),
- VMap,
- true)));
+ VMap, RF_None)));
GV->setLinkage(I->getLinkage());
GV->setThreadLocal(I->isThreadLocal());
GV->setConstant(I->isConstant());
@@ -121,7 +120,7 @@ Module *llvm::CloneModule(const Module *M,
GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
GA->setLinkage(I->getLinkage());
if (const Constant* C = I->getAliasee())
- GA->setAliasee(cast<Constant>(MapValue(C, VMap, true)));
+ GA->setAliasee(cast<Constant>(MapValue(C, VMap, RF_None)));
}
// And named metadata....
@@ -130,7 +129,8 @@ Module *llvm::CloneModule(const Module *M,
const NamedMDNode &NMD = *I;
NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
- NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap, true)));
+ NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap,
+ RF_None)));
}
return New;
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index b51f751..e633772 100644
--- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -186,8 +186,8 @@ void CodeExtractor::splitReturnBlocks() {
if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) {
BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
if (DT) {
- // Old dominates New. New node domiantes all other nodes dominated
- //by Old.
+ // Old dominates New. New node dominates all other nodes dominated
+ // by Old.
DomTreeNode *OldNode = DT->getNode(*I);
SmallVector<DomTreeNode*, 8> Children;
for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end();
diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
index 8e82a02..8cc2649 100644
--- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -129,7 +129,7 @@ AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) {
assert(II->getParent() != P->getIncomingBlock(i) &&
- "Invoke edge not supported yet"); II=II;
+ "Invoke edge not supported yet"); (void)II;
}
new StoreInst(P->getIncomingValue(i), Slot,
P->getIncomingBlock(i)->getTerminator());
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 88979e86..c1faf24 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -22,7 +22,9 @@
#include "llvm/Attributes.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CallSite.h"
@@ -170,7 +172,7 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
/// some edges of the callgraph may remain.
static void UpdateCallGraphAfterInlining(CallSite CS,
Function::iterator FirstNewBlock,
- ValueMap<const Value*, Value*> &VMap,
+ ValueToValueMapTy &VMap,
InlineFunctionInfo &IFI) {
CallGraph &CG = *IFI.CG;
const Function *Caller = CS.getInstruction()->getParent()->getParent();
@@ -193,7 +195,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
for (; I != E; ++I) {
const Value *OrigCall = I->first;
- ValueMap<const Value*, Value*>::iterator VMI = VMap.find(OrigCall);
+ ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
// Only copy the edge if the call was inlined!
if (VMI == VMap.end() || VMI->second == 0)
continue;
@@ -228,6 +230,90 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
CallerNode->removeCallEdgeFor(CS);
}
+/// HandleByValArgument - When inlining a call site that has a byval argument,
+/// we have to make the implicit memcpy explicit by adding it.
+static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
+ const Function *CalledFunc,
+ InlineFunctionInfo &IFI,
+ unsigned ByValAlignment) {
+ const Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
+
+ // If the called function is readonly, then it could not mutate the caller's
+ // copy of the byval'd memory. In this case, it is safe to elide the copy and
+ // temporary.
+ if (CalledFunc->onlyReadsMemory()) {
+ // If the byval argument has a specified alignment that is greater than the
+ // passed in pointer, then we either have to round up the input pointer or
+ // give up on this transformation.
+ if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment.
+ return Arg;
+
+ // If the pointer is already known to be sufficiently aligned, or if we can
+ // round it up to a larger alignment, then we don't need a temporary.
+ if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
+ IFI.TD) >= ByValAlignment)
+ return Arg;
+
+ // Otherwise, we have to make a memcpy to get a safe alignment. This is bad
+ // for code quality, but rarely happens and is required for correctness.
+ }
+
+ LLVMContext &Context = Arg->getContext();
+
+ const Type *VoidPtrTy = Type::getInt8PtrTy(Context);
+
+ // Create the alloca. If we have TargetData, use nice alignment.
+ unsigned Align = 1;
+ if (IFI.TD)
+ Align = IFI.TD->getPrefTypeAlignment(AggTy);
+
+ // If the byval had an alignment specified, we *must* use at least that
+ // alignment, as it is required by the byval argument (and uses of the
+ // pointer inside the callee).
+ Align = std::max(Align, ByValAlignment);
+
+ Function *Caller = TheCall->getParent()->getParent();
+
+ Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(),
+ &*Caller->begin()->begin());
+ // Emit a memcpy.
+ const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
+ Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
+ Intrinsic::memcpy,
+ Tys, 3);
+ Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
+ Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
+
+ Value *Size;
+ if (IFI.TD == 0)
+ Size = ConstantExpr::getSizeOf(AggTy);
+ else
+ Size = ConstantInt::get(Type::getInt64Ty(Context),
+ IFI.TD->getTypeStoreSize(AggTy));
+
+ // Always generate a memcpy of alignment 1 here because we don't know
+ // the alignment of the src pointer. Other optimizations can infer
+ // better alignment.
+ Value *CallArgs[] = {
+ DestCast, SrcCast, Size,
+ ConstantInt::get(Type::getInt32Ty(Context), 1),
+ ConstantInt::getFalse(Context) // isVolatile
+ };
+ CallInst *TheMemCpy =
+ CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall);
+
+ // If we have a call graph, update it.
+ if (CallGraph *CG = IFI.CG) {
+ CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn);
+ CallGraphNode *CallerNode = (*CG)[Caller];
+ CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN);
+ }
+
+ // Uses of the argument in the function should use our new alloca
+ // instead.
+ return NewAlloca;
+}
+
// InlineFunction - This function inlines the called function into the basic
// block of the caller. This returns false if it is not possible to inline this
// call. The program is still in a well defined state if this occurs though.
@@ -251,7 +337,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
CalledFunc->isDeclaration() || // call, or call to a vararg function!
CalledFunc->getFunctionType()->isVarArg()) return false;
-
// If the call to the callee is not a tail call, we must clear the 'tail'
// flags on any calls that we inline.
bool MustClearTailCallFlags =
@@ -287,7 +372,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
Function::iterator FirstNewBlock;
{ // Scope to destroy VMap after cloning.
- ValueMap<const Value*, Value*> VMap;
+ ValueToValueMapTy VMap;
assert(CalledFunc->arg_size() == CS.arg_size() &&
"No varargs calls can be inlined!");
@@ -304,58 +389,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
// by them explicit. However, we don't do this if the callee is readonly
// or readnone, because the copy would be unneeded: the callee doesn't
// modify the struct.
- if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) &&
- !CalledFunc->onlyReadsMemory()) {
- const Type *AggTy = cast<PointerType>(I->getType())->getElementType();
- const Type *VoidPtrTy =
- Type::getInt8PtrTy(Context);
-
- // Create the alloca. If we have TargetData, use nice alignment.
- unsigned Align = 1;
- if (IFI.TD) Align = IFI.TD->getPrefTypeAlignment(AggTy);
- Value *NewAlloca = new AllocaInst(AggTy, 0, Align,
- I->getName(),
- &*Caller->begin()->begin());
- // Emit a memcpy.
- const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
- Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
- Intrinsic::memcpy,
- Tys, 3);
- Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
- Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall);
-
- Value *Size;
- if (IFI.TD == 0)
- Size = ConstantExpr::getSizeOf(AggTy);
- else
- Size = ConstantInt::get(Type::getInt64Ty(Context),
- IFI.TD->getTypeStoreSize(AggTy));
-
- // Always generate a memcpy of alignment 1 here because we don't know
- // the alignment of the src pointer. Other optimizations can infer
- // better alignment.
- Value *CallArgs[] = {
- DestCast, SrcCast, Size,
- ConstantInt::get(Type::getInt32Ty(Context), 1),
- ConstantInt::get(Type::getInt1Ty(Context), 0)
- };
- CallInst *TheMemCpy =
- CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall);
-
- // If we have a call graph, update it.
- if (CallGraph *CG = IFI.CG) {
- CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn);
- CallGraphNode *CallerNode = (*CG)[Caller];
- CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN);
- }
-
- // Uses of the argument in the function should use our new alloca
- // instead.
- ActualArg = NewAlloca;
-
+ if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal)) {
+ ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
+ CalledFunc->getParamAlignment(ArgNo+1));
+
// Calls that we inline may use the new alloca, so we need to clear
- // their 'tail' flags.
- MustClearTailCallFlags = true;
+ // their 'tail' flags if HandleByValArgument introduced a new alloca and
+ // the callee has calls.
+ MustClearTailCallFlags |= ActualArg != *AI;
}
VMap[I] = ActualArg;
@@ -399,8 +440,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
if (!isa<Constant>(AI->getArraySize()))
continue;
- // Keep track of the static allocas that we inline into the caller if the
- // StaticAllocas pointer is non-null.
+ // Keep track of the static allocas that we inline into the caller.
IFI.StaticAllocas.push_back(AI);
// Scan for the block of allocas that we can move over, and move them
@@ -579,10 +619,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
// any users of the original call/invoke instruction.
const Type *RTy = CalledFunc->getReturnType();
+ PHINode *PHI = 0;
if (Returns.size() > 1) {
// The PHI node should go at the front of the new basic block to merge all
// possible incoming values.
- PHINode *PHI = 0;
if (!TheCall->use_empty()) {
PHI = PHINode::Create(RTy, TheCall->getName(),
AfterCallBB->begin());
@@ -600,14 +640,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
"Ret value not consistent in function!");
PHI->addIncoming(RI->getReturnValue(), RI->getParent());
}
-
- // Now that we inserted the PHI, check to see if it has a single value
- // (e.g. all the entries are the same or undef). If so, remove the PHI so
- // it doesn't block other optimizations.
- if (Value *V = PHI->hasConstantValue()) {
- PHI->replaceAllUsesWith(V);
- PHI->eraseFromParent();
- }
}
@@ -664,5 +696,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
// Now we can remove the CalleeEntry block, which is now empty.
Caller->getBasicBlockList().erase(CalleeEntry);
+ // If we inserted a phi node, check to see if it has a single value (e.g. all
+ // the entries are the same or undef). If so, remove the PHI so it doesn't
+ // block other optimizations.
+ if (PHI)
+ if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
+ PHI->replaceAllUsesWith(V);
+ PHI->eraseFromParent();
+ }
+
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
index 5ca8299..45c15de 100644
--- a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -23,7 +23,9 @@ using namespace llvm;
namespace {
struct InstNamer : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- InstNamer() : FunctionPass(ID) {}
+ InstNamer() : FunctionPass(ID) {
+ initializeInstNamerPass(*PassRegistry::getPassRegistry());
+ }
void getAnalysisUsage(AnalysisUsage &Info) const {
Info.setPreservesAll();
@@ -48,11 +50,10 @@ namespace {
};
char InstNamer::ID = 0;
- INITIALIZE_PASS(InstNamer, "instnamer",
- "Assign names to anonymous instructions", false, false);
}
-
+INITIALIZE_PASS(InstNamer, "instnamer",
+ "Assign names to anonymous instructions", false, false)
char &llvm::InstructionNamerID = InstNamer::ID;
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
index 275b265..b2e5fa6 100644
--- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -47,7 +47,9 @@ STATISTIC(NumLCSSA, "Number of live out of a loop variables");
namespace {
struct LCSSA : public LoopPass {
static char ID; // Pass identification, replacement for typeid
- LCSSA() : LoopPass(ID) {}
+ LCSSA() : LoopPass(ID) {
+ initializeLCSSAPass(*PassRegistry::getPassRegistry());
+ }
// Cached analysis information for the current function.
DominatorTree *DT;
@@ -65,10 +67,7 @@ namespace {
AU.setPreservesCFG();
AU.addRequired<DominatorTree>();
- AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
AU.addRequired<LoopInfo>();
- AU.addPreserved<LoopInfo>();
AU.addPreservedID(LoopSimplifyID);
AU.addPreserved<ScalarEvolution>();
}
@@ -90,7 +89,10 @@ namespace {
}
char LCSSA::ID = 0;
-INITIALIZE_PASS(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false);
+INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
Pass *llvm::createLCSSAPass() { return new LCSSA(); }
char &llvm::LCSSAID = LCSSA::ID;
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index 52f0499..063c76e 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -22,9 +22,11 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
@@ -66,9 +68,9 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
assert(BI->getParent() && "Terminator not inserted in block!");
OldDest->removePredecessor(BI->getParent());
- // Set the unconditional destination, and change the insn to be an
- // unconditional branch.
- BI->setUnconditionalDest(Destination);
+ // Replace the conditional branch with an unconditional one.
+ BranchInst::Create(Destination, BI);
+ BI->eraseFromParent();
return true;
}
@@ -81,8 +83,9 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
assert(BI->getParent() && "Terminator not inserted in block!");
Dest1->removePredecessor(BI->getParent());
- // Change a conditional branch to unconditional.
- BI->setUnconditionalDest(Dest1);
+ // Replace the conditional branch with an unconditional one.
+ BranchInst::Create(Dest1, BI);
+ BI->eraseFromParent();
return true;
}
return false;
@@ -209,9 +212,6 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) {
// We don't want debug info removed by anything this general.
if (isa<DbgInfoIntrinsic>(I)) return false;
- // Likewise for memory use markers.
- if (isa<MemoryUseIntrinsic>(I)) return false;
-
if (!I->mayHaveSideEffects()) return true;
// Special case intrinsics that "may have side effects" but can be deleted
@@ -260,29 +260,45 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {
return true;
}
+/// areAllUsesEqual - Check whether the uses of a value are all the same.
+/// This is similar to Instruction::hasOneUse() except this will also return
+/// true when there are multiple uses that all refer to the same value.
+static bool areAllUsesEqual(Instruction *I) {
+ Value::use_iterator UI = I->use_begin();
+ Value::use_iterator UE = I->use_end();
+ if (UI == UE)
+ return false;
+
+ User *TheUse = *UI;
+ for (++UI; UI != UE; ++UI) {
+ if (*UI != TheUse)
+ return false;
+ }
+ return true;
+}
+
/// RecursivelyDeleteDeadPHINode - If the specified value is an effectively
/// dead PHI node, due to being a def-use chain of single-use nodes that
/// either forms a cycle or is terminated by a trivially dead instruction,
/// delete it. If that makes any of its operands trivially dead, delete them
/// too, recursively. Return true if the PHI node is actually deleted.
-bool
-llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
+bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
// We can remove a PHI if it is on a cycle in the def-use graph
// where each node in the cycle has degree one, i.e. only one use,
// and is an instruction with no side effects.
- if (!PN->hasOneUse())
+ if (!areAllUsesEqual(PN))
return false;
bool Changed = false;
SmallPtrSet<PHINode *, 4> PHIs;
PHIs.insert(PN);
for (Instruction *J = cast<Instruction>(*PN->use_begin());
- J->hasOneUse() && !J->mayHaveSideEffects();
+ areAllUsesEqual(J) && !J->mayHaveSideEffects();
J = cast<Instruction>(*J->use_begin()))
// If we find a PHI more than once, we're on a cycle that
// won't prove fruitful.
if (PHINode *JP = dyn_cast<PHINode>(J))
- if (!PHIs.insert(cast<PHINode>(JP))) {
+ if (!PHIs.insert(JP)) {
// Break the cycle and delete the PHI and its operands.
JP->replaceAllUsesWith(UndefValue::get(JP->getType()));
(void)RecursivelyDeleteTriviallyDeadInstructions(JP);
@@ -346,13 +362,13 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
WeakVH PhiIt = &BB->front();
while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
-
- Value *PNV = PN->hasConstantValue();
+
+ Value *PNV = SimplifyInstruction(PN, TD);
if (PNV == 0) continue;
-
+
// If we're able to simplify the phi to a single value, substitute the new
// value into all of its uses.
- assert(PNV != PN && "hasConstantValue broken");
+ assert(PNV != PN && "SimplifyInstruction broken!");
Value *OldPhiIt = PhiIt;
ReplaceAndSimplifyAllUses(PN, PNV, TD);
@@ -402,6 +418,12 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
PredBB->replaceAllUsesWith(DestBB);
if (P) {
+ DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+ if (DT) {
+ BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock();
+ DT->changeImmediateDominator(DestBB, PredBBIDom);
+ DT->eraseNode(PredBB);
+ }
ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
if (PI) {
PI->replaceAllUses(PredBB, DestBB);
@@ -645,3 +667,95 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
return Changed;
}
+
+/// enforceKnownAlignment - If the specified pointer points to an object that
+/// we control, modify the object's alignment to PrefAlign. This isn't
+/// often possible though. If alignment is important, a more reliable approach
+/// is to simply align all global variables and allocation instructions to
+/// their preferred alignment from the beginning.
+///
+static unsigned enforceKnownAlignment(Value *V, unsigned Align,
+ unsigned PrefAlign) {
+
+ User *U = dyn_cast<User>(V);
+ if (!U) return Align;
+
+ switch (Operator::getOpcode(U)) {
+ default: break;
+ case Instruction::BitCast:
+ return enforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
+ case Instruction::GetElementPtr: {
+ // If all indexes are zero, it is just the alignment of the base pointer.
+ bool AllZeroOperands = true;
+ for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
+ if (!isa<Constant>(*i) ||
+ !cast<Constant>(*i)->isNullValue()) {
+ AllZeroOperands = false;
+ break;
+ }
+
+ if (AllZeroOperands) {
+ // Treat this like a bitcast.
+ return enforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
+ }
+ return Align;
+ }
+ case Instruction::Alloca: {
+ AllocaInst *AI = cast<AllocaInst>(V);
+ // If there is a requested alignment and if this is an alloca, round up.
+ if (AI->getAlignment() >= PrefAlign)
+ return AI->getAlignment();
+ AI->setAlignment(PrefAlign);
+ return PrefAlign;
+ }
+ }
+
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ // If there is a large requested alignment and we can, bump up the alignment
+ // of the global.
+ if (GV->isDeclaration()) return Align;
+
+ if (GV->getAlignment() >= PrefAlign)
+ return GV->getAlignment();
+ // We can only increase the alignment of the global if it has no alignment
+ // specified or if it is not assigned a section. If it is assigned a
+ // section, the global could be densely packed with other objects in the
+ // section, increasing the alignment could cause padding issues.
+ if (!GV->hasSection() || GV->getAlignment() == 0)
+ GV->setAlignment(PrefAlign);
+ return GV->getAlignment();
+ }
+
+ return Align;
+}
+
+/// getOrEnforceKnownAlignment - If the specified pointer has an alignment that
+/// we can determine, return it, otherwise return 0. If PrefAlign is specified,
+/// and it is more than the alignment of the ultimate object, see if we can
+/// increase the alignment of the ultimate object, making this check succeed.
+unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
+ const TargetData *TD) {
+ assert(V->getType()->isPointerTy() &&
+ "getOrEnforceKnownAlignment expects a pointer!");
+ unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD);
+ unsigned TrailZ = KnownZero.countTrailingOnes();
+
+ // Avoid trouble with rediculously large TrailZ values, such as
+ // those computed from a null pointer.
+ TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
+
+ unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
+
+ // LLVM doesn't support alignments larger than this currently.
+ Align = std::min(Align, +Value::MaximumAlignment);
+
+ if (PrefAlign > Align)
+ Align = enforceKnownAlignment(V, Align, PrefAlign);
+
+ // We don't need to make any adjustment.
+ return Align;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index b3c4801..2462630 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -37,7 +37,7 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loopsimplify"
+#define DEBUG_TYPE "loop-simplify"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Constants.h"
#include "llvm/Instructions.h"
@@ -46,9 +46,10 @@
#include "llvm/LLVMContext.h"
#include "llvm/Type.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CFG.h"
@@ -65,7 +66,9 @@ STATISTIC(NumNested , "Number of nested loops split out");
namespace {
struct LoopSimplify : public LoopPass {
static char ID; // Pass identification, replacement for typeid
- LoopSimplify() : LoopPass(ID) {}
+ LoopSimplify() : LoopPass(ID) {
+ initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
+ }
// AA - If we have an alias analysis object to update, this is it, otherwise
// this is null.
@@ -87,8 +90,6 @@ namespace {
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<ScalarEvolution>();
AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
- AU.addPreserved<DominanceFrontier>();
- AU.addPreservedID(LCSSAID);
}
/// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
@@ -107,8 +108,12 @@ namespace {
}
char LoopSimplify::ID = 0;
-INITIALIZE_PASS(LoopSimplify, "loopsimplify",
- "Canonicalize natural loops", true, false);
+INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
+ "Canonicalize natural loops", true, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
+ "Canonicalize natural loops", true, false)
// Publically exposed interface to pass...
char &llvm::LoopSimplifyID = LoopSimplify::ID;
@@ -157,9 +162,8 @@ ReprocessLoop:
for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(),
E = BadPreds.end(); I != E; ++I) {
- DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor ";
- WriteAsOperand(dbgs(), *I, false);
- dbgs() << "\n");
+ DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
+ << (*I)->getName() << "\n");
// Inform each successor of each dead pred.
for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
@@ -184,9 +188,8 @@ ReprocessLoop:
if (BI->isConditional()) {
if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
- DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in ";
- WriteAsOperand(dbgs(), *I, false);
- dbgs() << "\n");
+ DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
+ << (*I)->getName() << "\n");
BI->setCondition(ConstantInt::get(Cond->getType(),
!L->contains(BI->getSuccessor(0))));
@@ -262,8 +265,9 @@ ReprocessLoop:
PHINode *PN;
for (BasicBlock::iterator I = L->getHeader()->begin();
(PN = dyn_cast<PHINode>(I++)); )
- if (Value *V = PN->hasConstantValue(DT)) {
+ if (Value *V = SimplifyInstruction(PN, 0, DT)) {
if (AA) AA->deleteValue(PN);
+ if (SE) SE->forgetValue(PN);
PN->replaceAllUsesWith(V);
PN->eraseFromParent();
}
@@ -317,29 +321,22 @@ ReprocessLoop:
if (!FoldBranchToCommonDest(BI)) continue;
// Success. The block is now dead, so remove it from the loop,
- // update the dominator tree and dominance frontier, and delete it.
-
- DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block ";
- WriteAsOperand(dbgs(), ExitingBlock, false);
- dbgs() << "\n");
+ // update the dominator tree and delete it.
+ DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
+ << ExitingBlock->getName() << "\n");
assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
Changed = true;
LI->removeBlock(ExitingBlock);
- DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>();
DomTreeNode *Node = DT->getNode(ExitingBlock);
const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
Node->getChildren();
while (!Children.empty()) {
DomTreeNode *Child = Children.front();
DT->changeImmediateDominator(Child, Node->getIDom());
- if (DF) DF->changeImmediateDominator(Child->getBlock(),
- Node->getIDom()->getBlock(),
- DT);
}
DT->eraseNode(ExitingBlock);
- if (DF) DF->removeBlock(ExitingBlock);
BI->getSuccessor(0)->removePredecessor(ExitingBlock);
BI->getSuccessor(1)->removePredecessor(ExitingBlock);
@@ -378,9 +375,8 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
".preheader", this);
- DEBUG(dbgs() << "LoopSimplify: Creating pre-header ";
- WriteAsOperand(dbgs(), NewBB, false);
- dbgs() << "\n");
+ DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName()
+ << "\n");
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
@@ -409,10 +405,8 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
LoopBlocks.size(), ".loopexit",
this);
- DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block ";
- WriteAsOperand(dbgs(), NewBB, false);
- dbgs() << "\n");
-
+ DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
+ << NewBB->getName() << "\n");
return NewBB;
}
@@ -438,11 +432,11 @@ static void AddBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
/// FindPHIToPartitionLoops - The first part of loop-nestification is to find a
/// PHI node that tells us how to partition the loops.
static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
- AliasAnalysis *AA) {
+ AliasAnalysis *AA, LoopInfo *LI) {
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
PHINode *PN = cast<PHINode>(I);
++I;
- if (Value *V = PN->hasConstantValue(DT)) {
+ if (Value *V = SimplifyInstruction(PN, 0, DT)) {
// This is a degenerate PHI already, don't modify it!
PN->replaceAllUsesWith(V);
if (AA) AA->deleteValue(PN);
@@ -516,7 +510,7 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
/// created.
///
Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
- PHINode *PN = FindPHIToPartitionLoops(L, DT, AA);
+ PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI);
if (PN == 0) return 0; // No known way to partition.
// Pull out all predecessors that have varying values in the loop. This
@@ -643,9 +637,8 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
Header->getName()+".backedge", F);
BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
- DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block ";
- WriteAsOperand(dbgs(), BEBlock, false);
- dbgs() << "\n");
+ DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "
+ << BEBlock->getName() << "\n");
// Move the new backedge block to right after the last backedge block.
Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos;
@@ -721,8 +714,6 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
// Update dominator information
DT->splitBlock(BEBlock);
- if (DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>())
- DF->splitBlock(BEBlock);
return BEBlock;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 236bbe9..7da7271 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -16,13 +16,14 @@
//
// The process of unrolling can produce extraneous basic blocks linked with
// unconditional branches. This will be corrected in the future.
+//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "loop-unroll"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include "llvm/BasicBlock.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Support/Debug.h"
@@ -30,20 +31,19 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
-
using namespace llvm;
// TODO: Should these be here or in LoopUnroll?
STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
-STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
+STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
/// RemapInstruction - Convert the instruction operands from referencing the
/// current values into those specified by VMap.
static inline void RemapInstruction(Instruction *I,
- ValueMap<const Value *, Value*> &VMap) {
+ ValueToValueMapTy &VMap) {
for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
Value *Op = I->getOperand(op);
- ValueMap<const Value *, Value*>::iterator It = VMap.find(Op);
+ ValueToValueMapTy::iterator It = VMap.find(Op);
if (It != VMap.end())
I->setOperand(op, It->second);
}
@@ -96,7 +96,7 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
}
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
-/// if unrolling was succesful, or false if the loop was unmodified. Unrolling
+/// if unrolling was successful, or false if the loop was unmodified. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
/// branch instruction. However, if the trip count (and multiple) are not known,
/// loop unrolling will mostly produce more code that is no faster.
@@ -105,7 +105,8 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
///
/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
/// removed from the LoopPassManager as well. LPM can also be NULL.
-bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) {
+bool llvm::UnrollLoop(Loop *L, unsigned Count,
+ LoopInfo *LI, LPPassManager *LPM) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
@@ -127,6 +128,13 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
" Can't unroll; loop not terminated by a conditional branch.\n");
return false;
}
+
+ if (Header->hasAddressTaken()) {
+ // The loop-rotate pass can be helpful to avoid this in many cases.
+ DEBUG(dbgs() <<
+ " Won't unroll loop: address of header block is taken.\n");
+ return false;
+ }
// Notify ScalarEvolution that the loop will be substantially changed,
// if not outright eliminated.
@@ -189,7 +197,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
// For the first iteration of the loop, we should use the precloned values for
// PHI nodes. Insert associations now.
- typedef ValueMap<const Value*, Value*> ValueToValueMapTy;
ValueToValueMapTy LastValueMap;
std::vector<PHINode*> OrigPHINode;
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
@@ -274,7 +281,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
for (unsigned i = 0; i < NewBlocks.size(); ++i)
for (BasicBlock::iterator I = NewBlocks[i]->begin(),
E = NewBlocks[i]->end(); I != E; ++I)
- RemapInstruction(I, LastValueMap);
+ ::RemapInstruction(I, LastValueMap);
}
// The latch block exits the loop. If there are any PHI nodes in the
@@ -342,7 +349,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
// iteration.
Term->setSuccessor(!ContinueOnTrue, Dest);
} else {
- Term->setUnconditionalDest(Dest);
+ // Replace the conditional branch with an unconditional one.
+ BranchInst::Create(Dest, Term);
+ Term->eraseFromParent();
// Merge adjacent basic blocks, if possible.
if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) {
std::replace(Latches.begin(), Latches.end(), Dest, Fold);
@@ -362,10 +371,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
if (isInstructionTriviallyDead(Inst))
(*BB)->getInstList().erase(Inst);
- else if (Constant *C = ConstantFoldInstruction(Inst)) {
- Inst->replaceAllUsesWith(C);
- (*BB)->getInstList().erase(Inst);
- }
+ else if (Value *V = SimplifyInstruction(Inst))
+ if (LI->replacementPreservesLCSSAForm(Inst, V)) {
+ Inst->replaceAllUsesWith(V);
+ (*BB)->getInstList().erase(Inst);
+ }
}
NumCompletelyUnrolled += CompletelyUnroll;
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
index a46dd84..025ae0d 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -79,7 +79,9 @@ namespace {
explicit LowerInvoke(const TargetLowering *tli = NULL,
bool useExpensiveEHSupport = ExpensiveEHSupport)
: FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport),
- TLI(tli) { }
+ TLI(tli) {
+ initializeLowerInvokePass(*PassRegistry::getPassRegistry());
+ }
bool doInitialization(Module &M);
bool runOnFunction(Function &F);
@@ -102,7 +104,7 @@ namespace {
char LowerInvoke::ID = 0;
INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
"Lower invoke and unwind, for unwindless code generators",
- false, false);
+ false, false)
char &llvm::LowerInvokePassID = LowerInvoke::ID;
@@ -148,19 +150,20 @@ bool LowerInvoke::doInitialization(Module &M) {
"llvm.sjljeh.jblist");
}
-// VisualStudio defines setjmp as _setjmp via #include <csetjmp> / <setjmp.h>,
-// so it looks like Intrinsic::_setjmp
-#if defined(_MSC_VER) && defined(setjmp)
-#define setjmp_undefined_for_visual_studio
-#undef setjmp
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
#endif
SetJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::setjmp);
-#if defined(_MSC_VER) && defined(setjmp_undefined_for_visual_studio)
-// let's return it to _setjmp state in case anyone ever needs it after this
-// point under VisualStudio
-#define setjmp _setjmp
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
+ // let's return it to _setjmp state
+# pragma pop_macro("setjmp")
+# undef setjmp_undefined_for_msvc
#endif
LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp);
@@ -186,6 +189,7 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
NewCall->takeName(II);
NewCall->setCallingConv(II->getCallingConv());
NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
II->replaceAllUsesWith(NewCall);
// Insert an unconditional branch to the normal destination.
@@ -266,6 +270,7 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
NewCall->takeName(II);
NewCall->setCallingConv(II->getCallingConv());
NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
II->replaceAllUsesWith(NewCall);
// Replace the invoke with an uncond branch.
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index 5530b47..914a439 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -33,7 +33,9 @@ namespace {
class LowerSwitch : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- LowerSwitch() : FunctionPass(ID) {}
+ LowerSwitch() : FunctionPass(ID) {
+ initializeLowerSwitchPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
@@ -80,7 +82,7 @@ namespace {
char LowerSwitch::ID = 0;
INITIALIZE_PASS(LowerSwitch, "lowerswitch",
- "Lower SwitchInst's to branches", false, false);
+ "Lower SwitchInst's to branches", false, false)
// Publically exposed interface to pass...
char &llvm::LowerSwitchID = LowerSwitch::ID;
@@ -107,7 +109,8 @@ bool LowerSwitch::runOnFunction(Function &F) {
// operator<< - Used for debugging purposes.
//
static raw_ostream& operator<<(raw_ostream &O,
- const LowerSwitch::CaseVector &C) ATTRIBUTE_USED;
+ const LowerSwitch::CaseVector &C)
+ LLVM_ATTRIBUTE_USED;
static raw_ostream& operator<<(raw_ostream &O,
const LowerSwitch::CaseVector &C) {
O << "[";
diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
index 101645b..f4ca81a 100644
--- a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
@@ -27,18 +27,17 @@ STATISTIC(NumPromoted, "Number of alloca's promoted");
namespace {
struct PromotePass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- PromotePass() : FunctionPass(ID) {}
+ PromotePass() : FunctionPass(ID) {
+ initializePromotePassPass(*PassRegistry::getPassRegistry());
+ }
// runOnFunction - To run this pass, first we calculate the alloca
// instructions that are safe for promotion, then we promote each one.
//
virtual bool runOnFunction(Function &F);
- // getAnalysisUsage - We need dominance frontiers
- //
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
- AU.addRequired<DominanceFrontier>();
AU.setPreservesCFG();
// This is a cluster of orthogonal Transforms
AU.addPreserved<UnifyFunctionExitNodes>();
@@ -49,8 +48,11 @@ namespace {
} // end of anonymous namespace
char PromotePass::ID = 0;
-INITIALIZE_PASS(PromotePass, "mem2reg", "Promote Memory to Register",
- false, false);
+INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register",
+ false, false)
bool PromotePass::runOnFunction(Function &F) {
std::vector<AllocaInst*> Allocas;
@@ -60,7 +62,6 @@ bool PromotePass::runOnFunction(Function &F) {
bool Changed = false;
DominatorTree &DT = getAnalysis<DominatorTree>();
- DominanceFrontier &DF = getAnalysis<DominanceFrontier>();
while (1) {
Allocas.clear();
@@ -74,7 +75,7 @@ bool PromotePass::runOnFunction(Function &F) {
if (Allocas.empty()) break;
- PromoteMemToReg(Allocas, DT, DF);
+ PromoteMemToReg(Allocas, DT);
NumPromoted += Allocas.size();
Changed = true;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index a4e3029..e6a4373 100644
--- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -9,10 +9,19 @@
//
// This file promotes memory references to be register references. It promotes
// alloca instructions which only have loads and stores as uses. An alloca is
-// transformed by using dominator frontiers to place PHI nodes, then traversing
-// the function in depth-first order to rewrite loads and stores as appropriate.
-// This is just the standard SSA construction algorithm to construct "pruned"
-// SSA form.
+// transformed by using iterated dominator frontiers to place PHI nodes, then
+// traversing the function in depth-first order to rewrite loads and stores as
+// appropriate.
+//
+// The algorithm used here is based on:
+//
+// Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
+// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
+// Programming Languages
+// POPL '95. ACM, New York, NY, 62-73.
+//
+// It has been modified to not explicitly use the DJ graph data structure and to
+// directly compute pruned SSA using per-variable liveness information.
//
//===----------------------------------------------------------------------===//
@@ -24,9 +33,10 @@
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Metadata.h"
+#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -34,6 +44,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/CFG.h"
#include <algorithm>
+#include <map>
+#include <queue>
using namespace llvm;
STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
@@ -178,7 +190,6 @@ namespace {
///
std::vector<AllocaInst*> Allocas;
DominatorTree &DT;
- DominanceFrontier &DF;
DIFactory *DIF;
/// AST - An AliasSetTracker object to update. If null, don't update it.
@@ -187,7 +198,7 @@ namespace {
/// AllocaLookup - Reverse mapping of Allocas.
///
- std::map<AllocaInst*, unsigned> AllocaLookup;
+ DenseMap<AllocaInst*, unsigned> AllocaLookup;
/// NewPhiNodes - The PhiNodes we're adding.
///
@@ -216,12 +227,15 @@ namespace {
/// non-determinstic behavior.
DenseMap<BasicBlock*, unsigned> BBNumbers;
+ /// DomLevels - Maps DomTreeNodes to their level in the dominator tree.
+ DenseMap<DomTreeNode*, unsigned> DomLevels;
+
/// BBNumPreds - Lazily compute the number of predecessors a block has.
DenseMap<const BasicBlock*, unsigned> BBNumPreds;
public:
PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
- DominanceFrontier &df, AliasSetTracker *ast)
- : Allocas(A), DT(dt), DF(df), DIF(0), AST(ast) {}
+ AliasSetTracker *ast)
+ : Allocas(A), DT(dt), DIF(0), AST(ast) {}
~PromoteMem2Reg() {
delete DIF;
}
@@ -264,13 +278,12 @@ namespace {
void RenamePass(BasicBlock *BB, BasicBlock *Pred,
RenamePassData::ValVector &IncVals,
std::vector<RenamePassData> &Worklist);
- bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version,
- SmallPtrSet<PHINode*, 16> &InsertedPHINodes);
+ bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
};
struct AllocaInfo {
- std::vector<BasicBlock*> DefiningBlocks;
- std::vector<BasicBlock*> UsingBlocks;
+ SmallVector<BasicBlock*, 32> DefiningBlocks;
+ SmallVector<BasicBlock*, 32> UsingBlocks;
StoreInst *OnlyStore;
BasicBlock *OnlyBlock;
@@ -325,11 +338,19 @@ namespace {
DbgDeclare = FindAllocaDbgDeclare(AI);
}
};
+
+ typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair;
+
+ struct DomTreeNodeCompare {
+ bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) {
+ return LHS.second < RHS.second;
+ }
+ };
} // end of anonymous namespace
void PromoteMem2Reg::run() {
- Function &F = *DF.getRoot()->getParent();
+ Function &F = *DT.getRoot()->getParent();
if (AST) PointerAllocaValues.resize(Allocas.size());
AllocaDbgDeclares.resize(Allocas.size());
@@ -422,7 +443,26 @@ void PromoteMem2Reg::run() {
continue;
}
}
-
+
+ // If we haven't computed dominator tree levels, do so now.
+ if (DomLevels.empty()) {
+ SmallVector<DomTreeNode*, 32> Worklist;
+
+ DomTreeNode *Root = DT.getRootNode();
+ DomLevels[Root] = 0;
+ Worklist.push_back(Root);
+
+ while (!Worklist.empty()) {
+ DomTreeNode *Node = Worklist.pop_back_val();
+ unsigned ChildLevel = DomLevels[Node] + 1;
+ for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end();
+ CI != CE; ++CI) {
+ DomLevels[*CI] = ChildLevel;
+ Worklist.push_back(*CI);
+ }
+ }
+ }
+
// If we haven't computed a numbering for the BB's in the function, do so
// now.
if (BBNumbers.empty()) {
@@ -484,9 +524,8 @@ void PromoteMem2Reg::run() {
Instruction *A = Allocas[i];
// If there are any uses of the alloca instructions left, they must be in
- // sections of dead code that were not processed on the dominance frontier.
- // Just delete the users now.
- //
+ // unreachable basic blocks that were not processed by walking the dominator
+ // tree. Just delete the users now.
if (!A->use_empty())
A->replaceAllUsesWith(UndefValue::get(A->getType()));
if (AST) AST->deleteValue(A);
@@ -509,9 +548,9 @@ void PromoteMem2Reg::run() {
for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) {
PHINode *PN = I->second;
-
+
// If this PHI node merges one value and/or undefs, get the value.
- if (Value *V = PN->hasConstantValue(&DT)) {
+ if (Value *V = SimplifyInstruction(PN, 0, &DT)) {
if (AST && PN->getType()->isPointerTy())
AST->deleteValue(PN);
PN->replaceAllUsesWith(V);
@@ -663,7 +702,6 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
/// avoiding insertion of dead phi nodes.
void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
AllocaInfo &Info) {
-
// Unique the set of defining blocks for efficient lookup.
SmallPtrSet<BasicBlock*, 32> DefBlocks;
DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
@@ -673,47 +711,78 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
SmallPtrSet<BasicBlock*, 32> LiveInBlocks;
ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
- // Compute the locations where PhiNodes need to be inserted. Look at the
- // dominance frontier of EACH basic-block we have a write in.
- unsigned CurrentVersion = 0;
- SmallPtrSet<PHINode*, 16> InsertedPHINodes;
- std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks;
- while (!Info.DefiningBlocks.empty()) {
- BasicBlock *BB = Info.DefiningBlocks.back();
- Info.DefiningBlocks.pop_back();
-
- // Look up the DF for this write, add it to defining blocks.
- DominanceFrontier::const_iterator it = DF.find(BB);
- if (it == DF.end()) continue;
-
- const DominanceFrontier::DomSetType &S = it->second;
-
- // In theory we don't need the indirection through the DFBlocks vector.
- // In practice, the order of calling QueuePhiNode would depend on the
- // (unspecified) ordering of basic blocks in the dominance frontier,
- // which would give PHI nodes non-determinstic subscripts. Fix this by
- // processing blocks in order of the occurance in the function.
- for (DominanceFrontier::DomSetType::const_iterator P = S.begin(),
- PE = S.end(); P != PE; ++P) {
- // If the frontier block is not in the live-in set for the alloca, don't
- // bother processing it.
- if (!LiveInBlocks.count(*P))
- continue;
-
- DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P));
- }
-
- // Sort by which the block ordering in the function.
- if (DFBlocks.size() > 1)
- std::sort(DFBlocks.begin(), DFBlocks.end());
-
- for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) {
- BasicBlock *BB = DFBlocks[i].second;
- if (QueuePhiNode(BB, AllocaNum, CurrentVersion, InsertedPHINodes))
- Info.DefiningBlocks.push_back(BB);
+ // Use a priority queue keyed on dominator tree level so that inserted nodes
+ // are handled from the bottom of the dominator tree upwards.
+ typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
+ DomTreeNodeCompare> IDFPriorityQueue;
+ IDFPriorityQueue PQ;
+
+ for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(),
+ E = DefBlocks.end(); I != E; ++I) {
+ if (DomTreeNode *Node = DT.getNode(*I))
+ PQ.push(std::make_pair(Node, DomLevels[Node]));
+ }
+
+ SmallVector<std::pair<unsigned, BasicBlock*>, 32> DFBlocks;
+ SmallPtrSet<DomTreeNode*, 32> Visited;
+ SmallVector<DomTreeNode*, 32> Worklist;
+ while (!PQ.empty()) {
+ DomTreeNodePair RootPair = PQ.top();
+ PQ.pop();
+ DomTreeNode *Root = RootPair.first;
+ unsigned RootLevel = RootPair.second;
+
+ // Walk all dominator tree children of Root, inspecting their CFG edges with
+ // targets elsewhere on the dominator tree. Only targets whose level is at
+ // most Root's level are added to the iterated dominance frontier of the
+ // definition set.
+
+ Worklist.clear();
+ Worklist.push_back(Root);
+
+ while (!Worklist.empty()) {
+ DomTreeNode *Node = Worklist.pop_back_val();
+ BasicBlock *BB = Node->getBlock();
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
+ ++SI) {
+ DomTreeNode *SuccNode = DT.getNode(*SI);
+
+ // Quickly skip all CFG edges that are also dominator tree edges instead
+ // of catching them below.
+ if (SuccNode->getIDom() == Node)
+ continue;
+
+ unsigned SuccLevel = DomLevels[SuccNode];
+ if (SuccLevel > RootLevel)
+ continue;
+
+ if (!Visited.insert(SuccNode))
+ continue;
+
+ BasicBlock *SuccBB = SuccNode->getBlock();
+ if (!LiveInBlocks.count(SuccBB))
+ continue;
+
+ DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB));
+ if (!DefBlocks.count(SuccBB))
+ PQ.push(std::make_pair(SuccNode, SuccLevel));
+ }
+
+ for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
+ ++CI) {
+ if (!Visited.count(*CI))
+ Worklist.push_back(*CI);
+ }
}
- DFBlocks.clear();
}
+
+ if (DFBlocks.size() > 1)
+ std::sort(DFBlocks.begin(), DFBlocks.end());
+
+ unsigned CurrentVersion = 0;
+ for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i)
+ QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
}
/// RewriteSingleStoreAlloca - If there is only a single store to this value,
@@ -900,8 +969,7 @@ void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
// Alloca returns true if there wasn't already a phi-node for that variable
//
bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
- unsigned &Version,
- SmallPtrSet<PHINode*, 16> &InsertedPHINodes) {
+ unsigned &Version) {
// Look up the basic-block in question.
PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)];
@@ -916,8 +984,6 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
++NumPHIInsert;
PhiToAllocaMap[PN] = AllocaNo;
PN->reserveOperandSpace(getNumPreds(BB));
-
- InsertedPHINodes.insert(PN);
if (AST && PN->getType()->isPointerTy())
AST->copyValue(PointerAllocaValues[AllocaNo], PN);
@@ -986,7 +1052,7 @@ NextIteration:
AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
if (!Src) continue;
- std::map<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src);
+ DenseMap<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src);
if (AI == AllocaLookup.end()) continue;
Value *V = IncomingVals[AI->second];
@@ -1002,7 +1068,7 @@ NextIteration:
AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
if (!Dest) continue;
- std::map<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
+ DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
if (ai == AllocaLookup.end())
continue;
@@ -1036,18 +1102,17 @@ NextIteration:
}
/// PromoteMemToReg - Promote the specified list of alloca instructions into
-/// scalar registers, inserting PHI nodes as appropriate. This function makes
-/// use of DominanceFrontier information. This function does not modify the CFG
-/// of the function at all. All allocas must be from the same function.
+/// scalar registers, inserting PHI nodes as appropriate. This function does
+/// not modify the CFG of the function at all. All allocas must be from the
+/// same function.
///
/// If AST is specified, the specified tracker is updated to reflect changes
/// made to the IR.
///
void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
- DominatorTree &DT, DominanceFrontier &DF,
- AliasSetTracker *AST) {
+ DominatorTree &DT, AliasSetTracker *AST) {
// If there is nothing to do, bail out...
if (Allocas.empty()) return;
- PromoteMem2Reg(Allocas, DT, DF, AST).run();
+ PromoteMem2Reg(Allocas, DT, AST).run();
}
diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index c855988..3896d98 100644
--- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -14,6 +14,7 @@
#define DEBUG_TYPE "ssaupdater"
#include "llvm/Instructions.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Support/AlignOf.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CFG.h"
@@ -178,9 +179,9 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
// See if the PHI node can be merged to a single value. This can happen in
// loop cases when we get a PHI of itself and one other value.
- if (Value *ConstVal = InsertedPHI->hasConstantValue()) {
+ if (Value *V = SimplifyInstruction(InsertedPHI)) {
InsertedPHI->eraseFromParent();
- return ConstVal;
+ return V;
}
// If the client wants to know about all new instructions, tell it.
@@ -342,3 +343,169 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
return Impl.GetValue(BB);
}
+
+//===----------------------------------------------------------------------===//
+// LoadAndStorePromoter Implementation
+//===----------------------------------------------------------------------===//
+
+LoadAndStorePromoter::
+LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts,
+ SSAUpdater &S, StringRef BaseName) : SSA(S) {
+ if (Insts.empty()) return;
+
+ Value *SomeVal;
+ if (LoadInst *LI = dyn_cast<LoadInst>(Insts[0]))
+ SomeVal = LI;
+ else
+ SomeVal = cast<StoreInst>(Insts[0])->getOperand(0);
+
+ if (BaseName.empty())
+ BaseName = SomeVal->getName();
+ SSA.Initialize(SomeVal->getType(), BaseName);
+}
+
+
+void LoadAndStorePromoter::
+run(const SmallVectorImpl<Instruction*> &Insts) const {
+
+ // First step: bucket up uses of the alloca by the block they occur in.
+ // This is important because we have to handle multiple defs/uses in a block
+ // ourselves: SSAUpdater is purely for cross-block references.
+ // FIXME: Want a TinyVector<Instruction*> since there is often 0/1 element.
+ DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock;
+
+ for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+ Instruction *User = Insts[i];
+ UsesByBlock[User->getParent()].push_back(User);
+ }
+
+ // Okay, now we can iterate over all the blocks in the function with uses,
+ // processing them. Keep track of which loads are loading a live-in value.
+ // Walk the uses in the use-list order to be determinstic.
+ SmallVector<LoadInst*, 32> LiveInLoads;
+ DenseMap<Value*, Value*> ReplacedLoads;
+
+ for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+ Instruction *User = Insts[i];
+ BasicBlock *BB = User->getParent();
+ std::vector<Instruction*> &BlockUses = UsesByBlock[BB];
+
+ // If this block has already been processed, ignore this repeat use.
+ if (BlockUses.empty()) continue;
+
+ // Okay, this is the first use in the block. If this block just has a
+ // single user in it, we can rewrite it trivially.
+ if (BlockUses.size() == 1) {
+ // If it is a store, it is a trivial def of the value in the block.
+ if (StoreInst *SI = dyn_cast<StoreInst>(User))
+ SSA.AddAvailableValue(BB, SI->getOperand(0));
+ else
+ // Otherwise it is a load, queue it to rewrite as a live-in load.
+ LiveInLoads.push_back(cast<LoadInst>(User));
+ BlockUses.clear();
+ continue;
+ }
+
+ // Otherwise, check to see if this block is all loads.
+ bool HasStore = false;
+ for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
+ if (isa<StoreInst>(BlockUses[i])) {
+ HasStore = true;
+ break;
+ }
+ }
+
+ // If so, we can queue them all as live in loads. We don't have an
+ // efficient way to tell which on is first in the block and don't want to
+ // scan large blocks, so just add all loads as live ins.
+ if (!HasStore) {
+ for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
+ LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
+ BlockUses.clear();
+ continue;
+ }
+
+ // Otherwise, we have mixed loads and stores (or just a bunch of stores).
+ // Since SSAUpdater is purely for cross-block values, we need to determine
+ // the order of these instructions in the block. If the first use in the
+ // block is a load, then it uses the live in value. The last store defines
+ // the live out value. We handle this by doing a linear scan of the block.
+ Value *StoredValue = 0;
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+ if (LoadInst *L = dyn_cast<LoadInst>(II)) {
+ // If this is a load from an unrelated pointer, ignore it.
+ if (!isInstInList(L, Insts)) continue;
+
+ // If we haven't seen a store yet, this is a live in use, otherwise
+ // use the stored value.
+ if (StoredValue) {
+ replaceLoadWithValue(L, StoredValue);
+ L->replaceAllUsesWith(StoredValue);
+ ReplacedLoads[L] = StoredValue;
+ } else {
+ LiveInLoads.push_back(L);
+ }
+ continue;
+ }
+
+ if (StoreInst *S = dyn_cast<StoreInst>(II)) {
+ // If this is a store to an unrelated pointer, ignore it.
+ if (!isInstInList(S, Insts)) continue;
+
+ // Remember that this is the active value in the block.
+ StoredValue = S->getOperand(0);
+ }
+ }
+
+ // The last stored value that happened is the live-out for the block.
+ assert(StoredValue && "Already checked that there is a store in block");
+ SSA.AddAvailableValue(BB, StoredValue);
+ BlockUses.clear();
+ }
+
+ // Okay, now we rewrite all loads that use live-in values in the loop,
+ // inserting PHI nodes as necessary.
+ for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
+ LoadInst *ALoad = LiveInLoads[i];
+ Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
+ replaceLoadWithValue(ALoad, NewVal);
+
+ // Avoid assertions in unreachable code.
+ if (NewVal == ALoad) NewVal = UndefValue::get(NewVal->getType());
+ ALoad->replaceAllUsesWith(NewVal);
+ ReplacedLoads[ALoad] = NewVal;
+ }
+
+ // Allow the client to do stuff before we start nuking things.
+ doExtraRewritesBeforeFinalDeletion();
+
+ // Now that everything is rewritten, delete the old instructions from the
+ // function. They should all be dead now.
+ for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+ Instruction *User = Insts[i];
+
+ // If this is a load that still has uses, then the load must have been added
+ // as a live value in the SSAUpdate data structure for a block (e.g. because
+ // the loaded value was stored later). In this case, we need to recursively
+ // propagate the updates until we get to the real value.
+ if (!User->use_empty()) {
+ Value *NewVal = ReplacedLoads[User];
+ assert(NewVal && "not a replaced load?");
+
+ // Propagate down to the ultimate replacee. The intermediately loads
+ // could theoretically already have been deleted, so we don't want to
+ // dereference the Value*'s.
+ DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
+ while (RLI != ReplacedLoads.end()) {
+ NewVal = RLI->second;
+ RLI = ReplacedLoads.find(NewVal);
+ }
+
+ replaceLoadWithValue(cast<LoadInst>(User), NewVal);
+ User->replaceAllUsesWith(NewVal);
+ }
+
+ instructionDeleted(User);
+ User->eraseFromParent();
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 28d7afb..fb660db 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -19,33 +19,34 @@
#include "llvm/Type.h"
#include "llvm/DerivedTypes.h"
#include "llvm/GlobalVariable.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
-#include <functional>
#include <set>
#include <map>
using namespace llvm;
+static cl::opt<bool>
+DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
+ cl::desc("Duplicate return instructions into unconditional branches"));
+
STATISTIC(NumSpeculations, "Number of speculative executed instructions");
namespace {
class SimplifyCFGOpt {
const TargetData *const TD;
- ConstantInt *GetConstantInt(Value *V);
- Value *GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values);
- Value *GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values);
- bool GatherValueComparisons(Instruction *Cond, Value *&CompVal,
- std::vector<ConstantInt*> &Values);
Value *isValueEqualityComparison(TerminatorInst *TI);
BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
std::vector<std::pair<ConstantInt*, BasicBlock*> > &Cases);
@@ -53,6 +54,14 @@ class SimplifyCFGOpt {
BasicBlock *Pred);
bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI);
+ bool SimplifyReturn(ReturnInst *RI);
+ bool SimplifyUnwind(UnwindInst *UI);
+ bool SimplifyUnreachable(UnreachableInst *UI);
+ bool SimplifySwitch(SwitchInst *SI);
+ bool SimplifyIndirectBr(IndirectBrInst *IBI);
+ bool SimplifyUncondBranch(BranchInst *BI);
+ bool SimplifyCondBranch(BranchInst *BI);
+
public:
explicit SimplifyCFGOpt(const TargetData *td) : TD(td) {}
bool run(BasicBlock *BB);
@@ -91,8 +100,6 @@ static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
/// ExistPred, an existing predecessor of Succ.
static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
BasicBlock *ExistPred) {
- assert(std::find(succ_begin(ExistPred), succ_end(ExistPred), Succ) !=
- succ_end(ExistPred) && "ExistPred is not a predecessor of Succ!");
if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do
PHINode *PN;
@@ -102,28 +109,29 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
}
-/// GetIfCondition - Given a basic block (BB) with two predecessors (and
-/// presumably PHI nodes in it), check to see if the merge at this block is due
+/// GetIfCondition - Given a basic block (BB) with two predecessors (and at
+/// least one PHI node in it), check to see if the merge at this block is due
/// to an "if condition". If so, return the boolean condition that determines
/// which entry into BB will be taken. Also, return by references the block
/// that will be entered from if the condition is true, and the block that will
/// be entered if the condition is false.
///
-///
-static Value *GetIfCondition(BasicBlock *BB,
- BasicBlock *&IfTrue, BasicBlock *&IfFalse) {
- assert(std::distance(pred_begin(BB), pred_end(BB)) == 2 &&
+/// This does no checking to see if the true/false blocks have large or unsavory
+/// instructions in them.
+static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
+ BasicBlock *&IfFalse) {
+ PHINode *SomePHI = cast<PHINode>(BB->begin());
+ assert(SomePHI->getNumIncomingValues() == 2 &&
"Function can only handle blocks with 2 predecessors!");
- BasicBlock *Pred1 = *pred_begin(BB);
- BasicBlock *Pred2 = *++pred_begin(BB);
+ BasicBlock *Pred1 = SomePHI->getIncomingBlock(0);
+ BasicBlock *Pred2 = SomePHI->getIncomingBlock(1);
// We can only handle branches. Other control flow will be lowered to
// branches if possible anyway.
- if (!isa<BranchInst>(Pred1->getTerminator()) ||
- !isa<BranchInst>(Pred2->getTerminator()))
+ BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator());
+ BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator());
+ if (Pred1Br == 0 || Pred2Br == 0)
return 0;
- BranchInst *Pred1Br = cast<BranchInst>(Pred1->getTerminator());
- BranchInst *Pred2Br = cast<BranchInst>(Pred2->getTerminator());
// Eliminate code duplication by ensuring that Pred1Br is conditional if
// either are.
@@ -140,6 +148,12 @@ static Value *GetIfCondition(BasicBlock *BB,
}
if (Pred1Br->isConditional()) {
+ // The only thing we have to watch out for here is to make sure that Pred2
+ // doesn't have incoming edges from other blocks. If it does, the condition
+ // doesn't dominate BB.
+ if (Pred2->getSinglePredecessor() == 0)
+ return 0;
+
// If we found a conditional branch predecessor, make sure that it branches
// to BB and Pred2Br. If it doesn't, this isn't an "if statement".
if (Pred1Br->getSuccessor(0) == BB &&
@@ -156,39 +170,29 @@ static Value *GetIfCondition(BasicBlock *BB,
return 0;
}
- // The only thing we have to watch out for here is to make sure that Pred2
- // doesn't have incoming edges from other blocks. If it does, the condition
- // doesn't dominate BB.
- if (++pred_begin(Pred2) != pred_end(Pred2))
- return 0;
-
return Pred1Br->getCondition();
}
// Ok, if we got here, both predecessors end with an unconditional branch to
// BB. Don't panic! If both blocks only have a single (identical)
// predecessor, and THAT is a conditional branch, then we're all ok!
- if (pred_begin(Pred1) == pred_end(Pred1) ||
- ++pred_begin(Pred1) != pred_end(Pred1) ||
- pred_begin(Pred2) == pred_end(Pred2) ||
- ++pred_begin(Pred2) != pred_end(Pred2) ||
- *pred_begin(Pred1) != *pred_begin(Pred2))
+ BasicBlock *CommonPred = Pred1->getSinglePredecessor();
+ if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor())
return 0;
// Otherwise, if this is a conditional branch, then we can use it!
- BasicBlock *CommonPred = *pred_begin(Pred1);
- if (BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator())) {
- assert(BI->isConditional() && "Two successors but not conditional?");
- if (BI->getSuccessor(0) == Pred1) {
- IfTrue = Pred1;
- IfFalse = Pred2;
- } else {
- IfTrue = Pred2;
- IfFalse = Pred1;
- }
- return BI->getCondition();
+ BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
+ if (BI == 0) return 0;
+
+ assert(BI->isConditional() && "Two successors but not conditional?");
+ if (BI->getSuccessor(0) == Pred1) {
+ IfTrue = Pred1;
+ IfFalse = Pred2;
+ } else {
+ IfTrue = Pred2;
+ IfFalse = Pred1;
}
- return 0;
+ return BI->getCondition();
}
/// DominatesMergePoint - If we have a merge point of an "if condition" as
@@ -201,7 +205,7 @@ static Value *GetIfCondition(BasicBlock *BB,
/// non-trapping. If both are true, the instruction is inserted into the set
/// and true is returned.
static bool DominatesMergePoint(Value *V, BasicBlock *BB,
- std::set<Instruction*> *AggressiveInsts) {
+ SmallPtrSet<Instruction*, 4> *AggressiveInsts) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
// Non-instructions all dominate instructions, but not all constantexprs
@@ -219,56 +223,55 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// If this instruction is defined in a block that contains an unconditional
// branch to BB, then it must be in the 'conditional' part of the "if
- // statement".
- if (BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator()))
- if (BI->isUnconditional() && BI->getSuccessor(0) == BB) {
- if (!AggressiveInsts) return false;
- // Okay, it looks like the instruction IS in the "condition". Check to
- // see if it's a cheap instruction to unconditionally compute, and if it
- // only uses stuff defined outside of the condition. If so, hoist it out.
- if (!I->isSafeToSpeculativelyExecute())
- return false;
+ // statement". If not, it definitely dominates the region.
+ BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
+ if (BI == 0 || BI->isConditional() || BI->getSuccessor(0) != BB)
+ return true;
- switch (I->getOpcode()) {
- default: return false; // Cannot hoist this out safely.
- case Instruction::Load: {
- // We have to check to make sure there are no instructions before the
- // load in its basic block, as we are going to hoist the loop out to
- // its predecessor.
- BasicBlock::iterator IP = PBB->begin();
- while (isa<DbgInfoIntrinsic>(IP))
- IP++;
- if (IP != BasicBlock::iterator(I))
- return false;
- break;
- }
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::ICmp:
- break; // These are all cheap and non-trapping instructions.
- }
+ // If we aren't allowing aggressive promotion anymore, then don't consider
+ // instructions in the 'if region'.
+ if (AggressiveInsts == 0) return false;
+
+ // Okay, it looks like the instruction IS in the "condition". Check to
+ // see if it's a cheap instruction to unconditionally compute, and if it
+ // only uses stuff defined outside of the condition. If so, hoist it out.
+ if (!I->isSafeToSpeculativelyExecute())
+ return false;
- // Okay, we can only really hoist these out if their operands are not
- // defined in the conditional region.
- for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
- if (!DominatesMergePoint(*i, BB, 0))
- return false;
- // Okay, it's safe to do this! Remember this instruction.
- AggressiveInsts->insert(I);
- }
+ switch (I->getOpcode()) {
+ default: return false; // Cannot hoist this out safely.
+ case Instruction::Load:
+ // We have to check to make sure there are no instructions before the
+ // load in its basic block, as we are going to hoist the load out to its
+ // predecessor.
+ if (PBB->getFirstNonPHIOrDbg() != I)
+ return false;
+ break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::ICmp:
+ break; // These are all cheap and non-trapping instructions.
+ }
+ // Okay, we can only really hoist these out if their operands are not
+ // defined in the conditional region.
+ for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
+ if (!DominatesMergePoint(*i, BB, 0))
+ return false;
+ // Okay, it's safe to do this! Remember this instruction.
+ AggressiveInsts->insert(I);
return true;
}
/// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr
/// and PointerNullValue. Return NULL if value is not a constant int.
-ConstantInt *SimplifyCFGOpt::GetConstantInt(Value *V) {
+static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) {
// Normal constant int.
ConstantInt *CI = dyn_cast<ConstantInt>(V);
if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy())
@@ -296,77 +299,94 @@ ConstantInt *SimplifyCFGOpt::GetConstantInt(Value *V) {
return 0;
}
-/// GatherConstantSetEQs - Given a potentially 'or'd together collection of
-/// icmp_eq instructions that compare a value against a constant, return the
-/// value being compared, and stick the constant into the Values vector.
-Value *SimplifyCFGOpt::
-GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values) {
- if (Instruction *Inst = dyn_cast<Instruction>(V)) {
- if (Inst->getOpcode() == Instruction::ICmp &&
- cast<ICmpInst>(Inst)->getPredicate() == ICmpInst::ICMP_EQ) {
- if (ConstantInt *C = GetConstantInt(Inst->getOperand(1))) {
- Values.push_back(C);
- return Inst->getOperand(0);
- } else if (ConstantInt *C = GetConstantInt(Inst->getOperand(0))) {
- Values.push_back(C);
- return Inst->getOperand(1);
+/// GatherConstantCompares - Given a potentially 'or'd or 'and'd together
+/// collection of icmp eq/ne instructions that compare a value against a
+/// constant, return the value being compared, and stick the constant into the
+/// Values vector.
+static Value *
+GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
+ const TargetData *TD, bool isEQ, unsigned &UsedICmps) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return 0;
+
+ // If this is an icmp against a constant, handle this as one of the cases.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
+ if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) {
+ if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) {
+ UsedICmps++;
+ Vals.push_back(C);
+ return I->getOperand(0);
}
- } else if (Inst->getOpcode() == Instruction::Or) {
- if (Value *LHS = GatherConstantSetEQs(Inst->getOperand(0), Values))
- if (Value *RHS = GatherConstantSetEQs(Inst->getOperand(1), Values))
- if (LHS == RHS)
- return LHS;
+
+ // If we have "x ult 3" comparison, for example, then we can add 0,1,2 to
+ // the set.
+ ConstantRange Span =
+ ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue());
+
+ // If this is an and/!= check then we want to optimize "x ugt 2" into
+ // x != 0 && x != 1.
+ if (!isEQ)
+ Span = Span.inverse();
+
+ // If there are a ton of values, we don't want to make a ginormous switch.
+ if (Span.getSetSize().ugt(8) || Span.isEmptySet() ||
+ // We don't handle wrapped sets yet.
+ Span.isWrappedSet())
+ return 0;
+
+ for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
+ Vals.push_back(ConstantInt::get(V->getContext(), Tmp));
+ UsedICmps++;
+ return I->getOperand(0);
}
+ return 0;
}
- return 0;
-}
+
+ // Otherwise, we can only handle an | or &, depending on isEQ.
+ if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And))
+ return 0;
+
+ unsigned NumValsBeforeLHS = Vals.size();
+ unsigned UsedICmpsBeforeLHS = UsedICmps;
+ if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD,
+ isEQ, UsedICmps)) {
+ unsigned NumVals = Vals.size();
+ unsigned UsedICmpsBeforeRHS = UsedICmps;
+ if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+ isEQ, UsedICmps)) {
+ if (LHS == RHS)
+ return LHS;
+ Vals.resize(NumVals);
+ UsedICmps = UsedICmpsBeforeRHS;
+ }
-/// GatherConstantSetNEs - Given a potentially 'and'd together collection of
-/// setne instructions that compare a value against a constant, return the value
-/// being compared, and stick the constant into the Values vector.
-Value *SimplifyCFGOpt::
-GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values) {
- if (Instruction *Inst = dyn_cast<Instruction>(V)) {
- if (Inst->getOpcode() == Instruction::ICmp &&
- cast<ICmpInst>(Inst)->getPredicate() == ICmpInst::ICMP_NE) {
- if (ConstantInt *C = GetConstantInt(Inst->getOperand(1))) {
- Values.push_back(C);
- return Inst->getOperand(0);
- } else if (ConstantInt *C = GetConstantInt(Inst->getOperand(0))) {
- Values.push_back(C);
- return Inst->getOperand(1);
- }
- } else if (Inst->getOpcode() == Instruction::And) {
- if (Value *LHS = GatherConstantSetNEs(Inst->getOperand(0), Values))
- if (Value *RHS = GatherConstantSetNEs(Inst->getOperand(1), Values))
- if (LHS == RHS)
- return LHS;
+ // The RHS of the or/and can't be folded in and we haven't used "Extra" yet,
+ // set it and return success.
+ if (Extra == 0 || Extra == I->getOperand(1)) {
+ Extra = I->getOperand(1);
+ return LHS;
}
+
+ Vals.resize(NumValsBeforeLHS);
+ UsedICmps = UsedICmpsBeforeLHS;
+ return 0;
}
- return 0;
-}
-
-/// GatherValueComparisons - If the specified Cond is an 'and' or 'or' of a
-/// bunch of comparisons of one value against constants, return the value and
-/// the constants being compared.
-bool SimplifyCFGOpt::GatherValueComparisons(Instruction *Cond, Value *&CompVal,
- std::vector<ConstantInt*> &Values) {
- if (Cond->getOpcode() == Instruction::Or) {
- CompVal = GatherConstantSetEQs(Cond, Values);
-
- // Return true to indicate that the condition is true if the CompVal is
- // equal to one of the constants.
- return true;
- } else if (Cond->getOpcode() == Instruction::And) {
- CompVal = GatherConstantSetNEs(Cond, Values);
-
- // Return false to indicate that the condition is false if the CompVal is
- // equal to one of the constants.
- return false;
+
+ // If the LHS can't be folded in, but Extra is available and RHS can, try to
+ // use LHS as Extra.
+ if (Extra == 0 || Extra == I->getOperand(0)) {
+ Value *OldExtra = Extra;
+ Extra = I->getOperand(0);
+ if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+ isEQ, UsedICmps))
+ return RHS;
+ assert(Vals.size() == NumValsBeforeLHS);
+ Extra = OldExtra;
}
- return false;
+
+ return 0;
}
-
+
static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
Instruction* Cond = 0;
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
@@ -374,6 +394,8 @@ static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
if (BI->isConditional())
Cond = dyn_cast<Instruction>(BI->getCondition());
+ } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
+ Cond = dyn_cast<Instruction>(IBI->getAddress());
}
TI->eraseFromParent();
@@ -395,7 +417,7 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
if ((ICI->getPredicate() == ICmpInst::ICMP_EQ ||
ICI->getPredicate() == ICmpInst::ICMP_NE) &&
- GetConstantInt(ICI->getOperand(1)))
+ GetConstantInt(ICI->getOperand(1), TD))
CV = ICI->getOperand(0);
// Unwrap any lossless ptrtoint cast.
@@ -420,7 +442,7 @@ GetValueEqualityComparisonCases(TerminatorInst *TI,
BranchInst *BI = cast<BranchInst>(TI);
ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
- Cases.push_back(std::make_pair(GetConstantInt(ICI->getOperand(1)),
+ Cases.push_back(std::make_pair(GetConstantInt(ICI->getOperand(1), TD),
BI->getSuccessor(ICI->getPredicate() ==
ICmpInst::ICMP_NE)));
return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
@@ -459,8 +481,8 @@ ValuesOverlap(std::vector<std::pair<ConstantInt*, BasicBlock*> > &C1,
}
// Otherwise, just sort both lists and compare element by element.
- std::sort(V1->begin(), V1->end());
- std::sort(V2->begin(), V2->end());
+ array_pod_sort(V1->begin(), V1->end());
+ array_pod_sort(V2->begin(), V2->end());
unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
while (i1 != e1 && i2 != e2) {
if ((*V1)[i1].first == (*V2)[i2].first)
@@ -506,90 +528,87 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
// If we are here, we know that the value is none of those cases listed in
// PredCases. If there are any cases in ThisCases that are in PredCases, we
// can simplify TI.
- if (ValuesOverlap(PredCases, ThisCases)) {
- if (isa<BranchInst>(TI)) {
- // Okay, one of the successors of this condbr is dead. Convert it to a
- // uncond br.
- assert(ThisCases.size() == 1 && "Branch can only have one case!");
- // Insert the new branch.
- Instruction *NI = BranchInst::Create(ThisDef, TI);
- (void) NI;
-
- // Remove PHI node entries for the dead edge.
- ThisCases[0].second->removePredecessor(TI->getParent());
-
- DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
- << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
-
- EraseTerminatorInstAndDCECond(TI);
- return true;
-
- } else {
- SwitchInst *SI = cast<SwitchInst>(TI);
- // Okay, TI has cases that are statically dead, prune them away.
- SmallPtrSet<Constant*, 16> DeadCases;
- for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
- DeadCases.insert(PredCases[i].first);
+ if (!ValuesOverlap(PredCases, ThisCases))
+ return false;
+
+ if (isa<BranchInst>(TI)) {
+ // Okay, one of the successors of this condbr is dead. Convert it to a
+ // uncond br.
+ assert(ThisCases.size() == 1 && "Branch can only have one case!");
+ // Insert the new branch.
+ Instruction *NI = BranchInst::Create(ThisDef, TI);
+ (void) NI;
- DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
- << "Through successor TI: " << *TI);
+ // Remove PHI node entries for the dead edge.
+ ThisCases[0].second->removePredecessor(TI->getParent());
- for (unsigned i = SI->getNumCases()-1; i != 0; --i)
- if (DeadCases.count(SI->getCaseValue(i))) {
- SI->getSuccessor(i)->removePredecessor(TI->getParent());
- SI->removeCase(i);
- }
+ DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
- DEBUG(dbgs() << "Leaving: " << *TI << "\n");
- return true;
- }
+ EraseTerminatorInstAndDCECond(TI);
+ return true;
}
-
- } else {
- // Otherwise, TI's block must correspond to some matched value. Find out
- // which value (or set of values) this is.
- ConstantInt *TIV = 0;
- BasicBlock *TIBB = TI->getParent();
+
+ SwitchInst *SI = cast<SwitchInst>(TI);
+ // Okay, TI has cases that are statically dead, prune them away.
+ SmallPtrSet<Constant*, 16> DeadCases;
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
- if (PredCases[i].second == TIBB) {
- if (TIV == 0)
- TIV = PredCases[i].first;
- else
- return false; // Cannot handle multiple values coming to this block.
- }
- assert(TIV && "No edge from pred to succ?");
-
- // Okay, we found the one constant that our value can be if we get into TI's
- // BB. Find out which successor will unconditionally be branched to.
- BasicBlock *TheRealDest = 0;
- for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
- if (ThisCases[i].first == TIV) {
- TheRealDest = ThisCases[i].second;
- break;
+ DeadCases.insert(PredCases[i].first);
+
+ DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI);
+
+ for (unsigned i = SI->getNumCases()-1; i != 0; --i)
+ if (DeadCases.count(SI->getCaseValue(i))) {
+ SI->getSuccessor(i)->removePredecessor(TI->getParent());
+ SI->removeCase(i);
}
- // If not handled by any explicit cases, it is handled by the default case.
- if (TheRealDest == 0) TheRealDest = ThisDef;
+ DEBUG(dbgs() << "Leaving: " << *TI << "\n");
+ return true;
+ }
+
+ // Otherwise, TI's block must correspond to some matched value. Find out
+ // which value (or set of values) this is.
+ ConstantInt *TIV = 0;
+ BasicBlock *TIBB = TI->getParent();
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].second == TIBB) {
+ if (TIV != 0)
+ return false; // Cannot handle multiple values coming to this block.
+ TIV = PredCases[i].first;
+ }
+ assert(TIV && "No edge from pred to succ?");
+
+ // Okay, we found the one constant that our value can be if we get into TI's
+ // BB. Find out which successor will unconditionally be branched to.
+ BasicBlock *TheRealDest = 0;
+ for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
+ if (ThisCases[i].first == TIV) {
+ TheRealDest = ThisCases[i].second;
+ break;
+ }
- // Remove PHI node entries for dead edges.
- BasicBlock *CheckEdge = TheRealDest;
- for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI)
- if (*SI != CheckEdge)
- (*SI)->removePredecessor(TIBB);
- else
- CheckEdge = 0;
+ // If not handled by any explicit cases, it is handled by the default case.
+ if (TheRealDest == 0) TheRealDest = ThisDef;
- // Insert the new branch.
- Instruction *NI = BranchInst::Create(TheRealDest, TI);
- (void) NI;
+ // Remove PHI node entries for dead edges.
+ BasicBlock *CheckEdge = TheRealDest;
+ for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI)
+ if (*SI != CheckEdge)
+ (*SI)->removePredecessor(TIBB);
+ else
+ CheckEdge = 0;
- DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
- << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
+ // Insert the new branch.
+ Instruction *NI = BranchInst::Create(TheRealDest, TI);
+ (void) NI;
- EraseTerminatorInstAndDCECond(TI);
- return true;
- }
- return false;
+ DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
+
+ EraseTerminatorInstAndDCECond(TI);
+ return true;
}
namespace {
@@ -603,6 +622,16 @@ namespace {
};
}
+static int ConstantIntSortPredicate(const void *P1, const void *P2) {
+ const ConstantInt *LHS = *(const ConstantInt**)P1;
+ const ConstantInt *RHS = *(const ConstantInt**)P2;
+ if (LHS->getValue().ult(RHS->getValue()))
+ return 1;
+ if (LHS->getValue() == RHS->getValue())
+ return 0;
+ return -1;
+}
+
/// FoldValueComparisonIntoPredecessors - The specified terminator is a value
/// equality comparison instruction (either a switch or a branch on "X == c").
/// See if any of the predecessors of the terminator block are value comparisons
@@ -798,7 +827,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
if (!I2->use_empty())
I2->replaceAllUsesWith(I1);
I1->intersectOptionalDataWith(I2);
- BB2->getInstList().erase(I2);
+ I2->eraseFromParent();
I1 = BB1_Itr++;
while (isa<DbgInfoIntrinsic>(I1))
@@ -836,18 +865,18 @@ HoistTerminator:
(PN = dyn_cast<PHINode>(BBI)); ++BBI) {
Value *BB1V = PN->getIncomingValueForBlock(BB1);
Value *BB2V = PN->getIncomingValueForBlock(BB2);
- if (BB1V != BB2V) {
- // These values do not agree. Insert a select instruction before NT
- // that determines the right value.
- SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
- if (SI == 0)
- SI = SelectInst::Create(BI->getCondition(), BB1V, BB2V,
- BB1V->getName()+"."+BB2V->getName(), NT);
- // Make the PHI node use the select for all incoming values for BB1/BB2
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2)
- PN->setIncomingValue(i, SI);
- }
+ if (BB1V == BB2V) continue;
+
+ // These values do not agree. Insert a select instruction before NT
+ // that determines the right value.
+ SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
+ if (SI == 0)
+ SI = SelectInst::Create(BI->getCondition(), BB1V, BB2V,
+ BB1V->getName()+"."+BB2V->getName(), NT);
+ // Make the PHI node use the select for all incoming values for BB1/BB2
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2)
+ PN->setIncomingValue(i, SI);
}
}
@@ -872,21 +901,19 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
BBI != BBE; ++BBI) {
Instruction *I = BBI;
// Skip debug info.
- if (isa<DbgInfoIntrinsic>(I)) continue;
- if (I == Term) break;
+ if (isa<DbgInfoIntrinsic>(I)) continue;
+ if (I == Term) break;
- if (!HInst)
- HInst = I;
- else
+ if (HInst)
return false;
+ HInst = I;
}
if (!HInst)
return false;
// Be conservative for now. FP select instruction can often be expensive.
Value *BrCond = BI->getCondition();
- if (isa<Instruction>(BrCond) &&
- cast<Instruction>(BrCond)->getOpcode() == Instruction::FCmp)
+ if (isa<FCmpInst>(BrCond))
return false;
// If BB1 is actually on the false edge of the conditional branch, remember
@@ -990,12 +1017,12 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
for(Value::use_iterator UI = BrCond->use_begin(), UE = BrCond->use_end();
UI != UE; ++UI) {
Instruction *Use = cast<Instruction>(*UI);
- if (BB1Insns.count(Use)) {
- // If BrCond uses the instruction that place it just before
- // branch instruction.
- InsertPos = BI;
- break;
- }
+ if (!BB1Insns.count(Use)) continue;
+
+ // If BrCond uses the instruction that place it just before
+ // branch instruction.
+ InsertPos = BI;
+ break;
}
} else
InsertPos = BI;
@@ -1016,8 +1043,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
for (unsigned i = 0, e = PHIUses.size(); i != e; ++i) {
PHINode *PN = PHIUses[i];
for (unsigned j = 0, ee = PN->getNumIncomingValues(); j != ee; ++j)
- if (PN->getIncomingBlock(j) == BB1 ||
- PN->getIncomingBlock(j) == BIParent)
+ if (PN->getIncomingBlock(j) == BB1 || PN->getIncomingBlock(j) == BIParent)
PN->setIncomingValue(j, SI);
}
@@ -1055,7 +1081,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
/// that is defined in the same block as the branch and if any PHI entries are
/// constants, thread edges corresponding to that entry to be branches to their
/// ultimate destination.
-static bool FoldCondBranchOnPHI(BranchInst *BI) {
+static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
BasicBlock *BB = BI->getParent();
PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
// NOTE: we currently cannot transform this case if the PHI node is used
@@ -1075,78 +1101,73 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {
// Okay, this is a simple enough basic block. See if any phi values are
// constants.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- ConstantInt *CB;
- if ((CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i))) &&
- CB->getType()->isIntegerTy(1)) {
- // Okay, we now know that all edges from PredBB should be revectored to
- // branch to RealDest.
- BasicBlock *PredBB = PN->getIncomingBlock(i);
- BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
+ ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
+ if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue;
+
+ // Okay, we now know that all edges from PredBB should be revectored to
+ // branch to RealDest.
+ BasicBlock *PredBB = PN->getIncomingBlock(i);
+ BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
+
+ if (RealDest == BB) continue; // Skip self loops.
+
+ // The dest block might have PHI nodes, other predecessors and other
+ // difficult cases. Instead of being smart about this, just insert a new
+ // block that jumps to the destination block, effectively splitting
+ // the edge we are about to create.
+ BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(),
+ RealDest->getName()+".critedge",
+ RealDest->getParent(), RealDest);
+ BranchInst::Create(RealDest, EdgeBB);
+
+ // Update PHI nodes.
+ AddPredecessorToBlock(RealDest, EdgeBB, BB);
+
+ // BB may have instructions that are being threaded over. Clone these
+ // instructions into EdgeBB. We know that there will be no uses of the
+ // cloned instructions outside of EdgeBB.
+ BasicBlock::iterator InsertPt = EdgeBB->begin();
+ DenseMap<Value*, Value*> TranslateMap; // Track translated values.
+ for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
+ TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
+ continue;
+ }
+ // Clone the instruction.
+ Instruction *N = BBI->clone();
+ if (BBI->hasName()) N->setName(BBI->getName()+".c");
- if (RealDest == BB) continue; // Skip self loops.
+ // Update operands due to translation.
+ for (User::op_iterator i = N->op_begin(), e = N->op_end();
+ i != e; ++i) {
+ DenseMap<Value*, Value*>::iterator PI = TranslateMap.find(*i);
+ if (PI != TranslateMap.end())
+ *i = PI->second;
+ }
- // The dest block might have PHI nodes, other predecessors and other
- // difficult cases. Instead of being smart about this, just insert a new
- // block that jumps to the destination block, effectively splitting
- // the edge we are about to create.
- BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(),
- RealDest->getName()+".critedge",
- RealDest->getParent(), RealDest);
- BranchInst::Create(RealDest, EdgeBB);
- PHINode *PN;
- for (BasicBlock::iterator BBI = RealDest->begin();
- (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
- Value *V = PN->getIncomingValueForBlock(BB);
- PN->addIncoming(V, EdgeBB);
+ // Check for trivial simplification.
+ if (Value *V = SimplifyInstruction(N, TD)) {
+ TranslateMap[BBI] = V;
+ delete N; // Instruction folded away, don't need actual inst
+ } else {
+ // Insert the new instruction into its new home.
+ EdgeBB->getInstList().insert(InsertPt, N);
+ if (!BBI->use_empty())
+ TranslateMap[BBI] = N;
}
+ }
- // BB may have instructions that are being threaded over. Clone these
- // instructions into EdgeBB. We know that there will be no uses of the
- // cloned instructions outside of EdgeBB.
- BasicBlock::iterator InsertPt = EdgeBB->begin();
- std::map<Value*, Value*> TranslateMap; // Track translated values.
- for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
- if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
- TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
- } else {
- // Clone the instruction.
- Instruction *N = BBI->clone();
- if (BBI->hasName()) N->setName(BBI->getName()+".c");
-
- // Update operands due to translation.
- for (User::op_iterator i = N->op_begin(), e = N->op_end();
- i != e; ++i) {
- std::map<Value*, Value*>::iterator PI =
- TranslateMap.find(*i);
- if (PI != TranslateMap.end())
- *i = PI->second;
- }
-
- // Check for trivial simplification.
- if (Constant *C = ConstantFoldInstruction(N)) {
- TranslateMap[BBI] = C;
- delete N; // Constant folded away, don't need actual inst
- } else {
- // Insert the new instruction into its new home.
- EdgeBB->getInstList().insert(InsertPt, N);
- if (!BBI->use_empty())
- TranslateMap[BBI] = N;
- }
- }
+ // Loop over all of the edges from PredBB to BB, changing them to branch
+ // to EdgeBB instead.
+ TerminatorInst *PredBBTI = PredBB->getTerminator();
+ for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
+ if (PredBBTI->getSuccessor(i) == BB) {
+ BB->removePredecessor(PredBB);
+ PredBBTI->setSuccessor(i, EdgeBB);
}
-
- // Loop over all of the edges from PredBB to BB, changing them to branch
- // to EdgeBB instead.
- TerminatorInst *PredBBTI = PredBB->getTerminator();
- for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
- if (PredBBTI->getSuccessor(i) == BB) {
- BB->removePredecessor(PredBB);
- PredBBTI->setSuccessor(i, EdgeBB);
- }
-
- // Recurse, simplifying any other constants.
- return FoldCondBranchOnPHI(BI) | true;
- }
+
+ // Recurse, simplifying any other constants.
+ return FoldCondBranchOnPHI(BI, TD) | true;
}
return false;
@@ -1154,18 +1175,20 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {
/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
/// PHI node, see if we can eliminate it.
-static bool FoldTwoEntryPHINode(PHINode *PN) {
+static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
// Ok, this is a two entry PHI node. Check to see if this is a simple "if
// statement", which has a very simple dominance structure. Basically, we
// are trying to find the condition that is being branched on, which
// subsequently causes this merge to happen. We really want control
// dependence information for this check, but simplifycfg can't keep it up
// to date, and this catches most of the cases we care about anyway.
- //
BasicBlock *BB = PN->getParent();
BasicBlock *IfTrue, *IfFalse;
Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse);
- if (!IfCond) return false;
+ if (!IfCond ||
+ // Don't bother if the branch will be constant folded trivially.
+ isa<ConstantInt>(IfCond))
+ return false;
// Okay, we found that we can merge this two-entry phi node into a select.
// Doing so would require us to fold *all* two entry phi nodes in this block.
@@ -1177,42 +1200,49 @@ static bool FoldTwoEntryPHINode(PHINode *PN) {
if (NumPhis > 2)
return false;
- DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: "
- << IfTrue->getName() << " F: " << IfFalse->getName() << "\n");
-
// Loop over the PHI's seeing if we can promote them all to select
// instructions. While we are at it, keep track of the instructions
// that need to be moved to the dominating block.
- std::set<Instruction*> AggressiveInsts;
-
- BasicBlock::iterator AfterPHIIt = BB->begin();
- while (isa<PHINode>(AfterPHIIt)) {
- PHINode *PN = cast<PHINode>(AfterPHIIt++);
- if (PN->getIncomingValue(0) == PN->getIncomingValue(1)) {
- if (PN->getIncomingValue(0) != PN)
- PN->replaceAllUsesWith(PN->getIncomingValue(0));
- else
- PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
- } else if (!DominatesMergePoint(PN->getIncomingValue(0), BB,
- &AggressiveInsts) ||
- !DominatesMergePoint(PN->getIncomingValue(1), BB,
- &AggressiveInsts)) {
- return false;
+ SmallPtrSet<Instruction*, 4> AggressiveInsts;
+
+ for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
+ PHINode *PN = cast<PHINode>(II++);
+ if (Value *V = SimplifyInstruction(PN, TD)) {
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ continue;
}
+
+ if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts) ||
+ !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts))
+ return false;
}
+ // If we folded the the first phi, PN dangles at this point. Refresh it. If
+ // we ran out of PHIs then we simplified them all.
+ PN = dyn_cast<PHINode>(BB->begin());
+ if (PN == 0) return true;
+
+ // Don't fold i1 branches on PHIs which contain binary operators. These can
+ // often be turned into switches and other things.
+ if (PN->getType()->isIntegerTy(1) &&
+ (isa<BinaryOperator>(PN->getIncomingValue(0)) ||
+ isa<BinaryOperator>(PN->getIncomingValue(1)) ||
+ isa<BinaryOperator>(IfCond)))
+ return false;
+
// If we all PHI nodes are promotable, check to make sure that all
// instructions in the predecessor blocks can be promoted as well. If
// not, we won't be able to get rid of the control flow, so it's not
// worth promoting to select instructions.
- BasicBlock *DomBlock = 0, *IfBlock1 = 0, *IfBlock2 = 0;
- PN = cast<PHINode>(BB->begin());
- BasicBlock *Pred = PN->getIncomingBlock(0);
- if (cast<BranchInst>(Pred->getTerminator())->isUnconditional()) {
- IfBlock1 = Pred;
- DomBlock = *pred_begin(Pred);
- for (BasicBlock::iterator I = Pred->begin();
- !isa<TerminatorInst>(I); ++I)
+ BasicBlock *DomBlock = 0;
+ BasicBlock *IfBlock1 = PN->getIncomingBlock(0);
+ BasicBlock *IfBlock2 = PN->getIncomingBlock(1);
+ if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) {
+ IfBlock1 = 0;
+ } else {
+ DomBlock = *pred_begin(IfBlock1);
+ for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I)
if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
// This is not an aggressive instruction that we can promote.
// Because of this, we won't be able to get rid of the control
@@ -1221,12 +1251,11 @@ static bool FoldTwoEntryPHINode(PHINode *PN) {
}
}
- Pred = PN->getIncomingBlock(1);
- if (cast<BranchInst>(Pred->getTerminator())->isUnconditional()) {
- IfBlock2 = Pred;
- DomBlock = *pred_begin(Pred);
- for (BasicBlock::iterator I = Pred->begin();
- !isa<TerminatorInst>(I); ++I)
+ if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) {
+ IfBlock2 = 0;
+ } else {
+ DomBlock = *pred_begin(IfBlock2);
+ for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I)
if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
// This is not an aggressive instruction that we can promote.
// Because of this, we won't be able to get rid of the control
@@ -1234,56 +1263,45 @@ static bool FoldTwoEntryPHINode(PHINode *PN) {
return false;
}
}
+
+ DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: "
+ << IfTrue->getName() << " F: " << IfFalse->getName() << "\n");
// If we can still promote the PHI nodes after this gauntlet of tests,
// do all of the PHI's now.
-
+ Instruction *InsertPt = DomBlock->getTerminator();
+
// Move all 'aggressive' instructions, which are defined in the
// conditional parts of the if's up to the dominating block.
- if (IfBlock1) {
- DomBlock->getInstList().splice(DomBlock->getTerminator(),
- IfBlock1->getInstList(),
- IfBlock1->begin(),
+ if (IfBlock1)
+ DomBlock->getInstList().splice(InsertPt,
+ IfBlock1->getInstList(), IfBlock1->begin(),
IfBlock1->getTerminator());
- }
- if (IfBlock2) {
- DomBlock->getInstList().splice(DomBlock->getTerminator(),
- IfBlock2->getInstList(),
- IfBlock2->begin(),
+ if (IfBlock2)
+ DomBlock->getInstList().splice(InsertPt,
+ IfBlock2->getInstList(), IfBlock2->begin(),
IfBlock2->getTerminator());
- }
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
// Change the PHI node into a select instruction.
- Value *TrueVal =
- PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
- Value *FalseVal =
- PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
+ Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
+ Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
- Value *NV = SelectInst::Create(IfCond, TrueVal, FalseVal, "", AfterPHIIt);
+ Value *NV = SelectInst::Create(IfCond, TrueVal, FalseVal, "", InsertPt);
PN->replaceAllUsesWith(NV);
NV->takeName(PN);
-
- BB->getInstList().erase(PN);
+ PN->eraseFromParent();
}
+
+ // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement
+ // has been flattened. Change DomBlock to jump directly to our new block to
+ // avoid other simplifycfg's kicking in on the diamond.
+ TerminatorInst *OldTI = DomBlock->getTerminator();
+ BranchInst::Create(BB, OldTI);
+ OldTI->eraseFromParent();
return true;
}
-/// isTerminatorFirstRelevantInsn - Return true if Term is very first
-/// instruction ignoring Phi nodes and dbg intrinsics.
-static bool isTerminatorFirstRelevantInsn(BasicBlock *BB, Instruction *Term) {
- BasicBlock::iterator BBI = Term;
- while (BBI != BB->begin()) {
- --BBI;
- if (!isa<DbgInfoIntrinsic>(BBI))
- break;
- }
-
- if (isa<PHINode>(BBI) || &*BBI == Term || isa<DbgInfoIntrinsic>(BBI))
- return true;
- return false;
-}
-
/// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes
/// to two returning blocks, try to merge them together into one return,
/// introducing a select if the return values disagree.
@@ -1297,9 +1315,9 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {
// Check to ensure both blocks are empty (just a return) or optionally empty
// with PHI nodes. If there are other instructions, merging would cause extra
// computation on one path or the other.
- if (!isTerminatorFirstRelevantInsn(TrueSucc, TrueRet))
+ if (!TrueSucc->getFirstNonPHIOrDbg()->isTerminator())
return false;
- if (!isTerminatorFirstRelevantInsn(FalseSucc, FalseRet))
+ if (!FalseSucc->getFirstNonPHIOrDbg()->isTerminator())
return false;
// Okay, we found a branch that is going to two return nodes. If
@@ -1386,7 +1404,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// must be at the front of the block.
BasicBlock::iterator FrontIt = BB->front();
// Ignore dbg intrinsics.
- while(isa<DbgInfoIntrinsic>(FrontIt))
+ while (isa<DbgInfoIntrinsic>(FrontIt))
++FrontIt;
// Allow a single instruction to be hoisted in addition to the compare
@@ -1470,7 +1488,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
UsedValues.erase(Pair.first);
if (UsedValues.empty()) break;
- if (Instruction* I = dyn_cast<Instruction>(Pair.first)) {
+ if (Instruction *I = dyn_cast<Instruction>(Pair.first)) {
for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
OI != OE; ++OI)
Worklist.push_back(std::make_pair(OI->get(), Pair.second+1));
@@ -1498,9 +1516,16 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// If we need to invert the condition in the pred block to match, do so now.
if (InvertPredCond) {
- Value *NewCond =
- BinaryOperator::CreateNot(PBI->getCondition(),
+ Value *NewCond = PBI->getCondition();
+
+ if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
+ CmpInst *CI = cast<CmpInst>(NewCond);
+ CI->setPredicate(CI->getInversePredicate());
+ } else {
+ NewCond = BinaryOperator::CreateNot(NewCond,
PBI->getCondition()->getName()+".not", PBI);
+ }
+
PBI->setCondition(NewCond);
BasicBlock *OldTrue = PBI->getSuccessor(0);
BasicBlock *OldFalse = PBI->getSuccessor(1);
@@ -1686,17 +1711,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
// OtherDest may have phi nodes. If so, add an entry from PBI's
// block that are identical to the entries for BI's block.
- PHINode *PN;
- for (BasicBlock::iterator II = OtherDest->begin();
- (PN = dyn_cast<PHINode>(II)); ++II) {
- Value *V = PN->getIncomingValueForBlock(BB);
- PN->addIncoming(V, PBI->getParent());
- }
+ AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
// We know that the CommonDest already had an edge from PBI to
// it. If it has PHIs though, the PHIs may have different
// entries for BB and PBI's BB. If so, insert a select to make
// them agree.
+ PHINode *PN;
for (BasicBlock::iterator II = CommonDest->begin();
(PN = dyn_cast<PHINode>(II)); ++II) {
Value *BIV = PN->getIncomingValueForBlock(BB);
@@ -1718,481 +1739,789 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
return true;
}
-bool SimplifyCFGOpt::run(BasicBlock *BB) {
- bool Changed = false;
- Function *M = BB->getParent();
-
- assert(BB && BB->getParent() && "Block not embedded in function!");
- assert(BB->getTerminator() && "Degenerate basic block encountered!");
+// SimplifyTerminatorOnSelect - Simplifies a terminator by replacing it with a
+// branch to TrueBB if Cond is true or to FalseBB if Cond is false.
+// Takes care of updating the successors and removing the old terminator.
+// Also makes sure not to introduce new successors by assuming that edges to
+// non-successor TrueBBs and FalseBBs aren't reachable.
+static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
+ BasicBlock *TrueBB, BasicBlock *FalseBB){
+ // Remove any superfluous successor edges from the CFG.
+ // First, figure out which successors to preserve.
+ // If TrueBB and FalseBB are equal, only try to preserve one copy of that
+ // successor.
+ BasicBlock *KeepEdge1 = TrueBB;
+ BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : 0;
+
+ // Then remove the rest.
+ for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) {
+ BasicBlock *Succ = OldTerm->getSuccessor(I);
+ // Make sure only to keep exactly one copy of each edge.
+ if (Succ == KeepEdge1)
+ KeepEdge1 = 0;
+ else if (Succ == KeepEdge2)
+ KeepEdge2 = 0;
+ else
+ Succ->removePredecessor(OldTerm->getParent());
+ }
- // Remove basic blocks that have no predecessors (except the entry block)...
- // or that just have themself as a predecessor. These are unreachable.
- if ((pred_begin(BB) == pred_end(BB) &&
- &BB->getParent()->getEntryBlock() != BB) ||
- BB->getSinglePredecessor() == BB) {
- DEBUG(dbgs() << "Removing BB: \n" << *BB);
- DeleteDeadBlock(BB);
- return true;
+ // Insert an appropriate new terminator.
+ if ((KeepEdge1 == 0) && (KeepEdge2 == 0)) {
+ if (TrueBB == FalseBB)
+ // We were only looking for one successor, and it was present.
+ // Create an unconditional branch to it.
+ BranchInst::Create(TrueBB, OldTerm);
+ else
+ // We found both of the successors we were looking for.
+ // Create a conditional branch sharing the condition of the select.
+ BranchInst::Create(TrueBB, FalseBB, Cond, OldTerm);
+ } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
+ // Neither of the selected blocks were successors, so this
+ // terminator must be unreachable.
+ new UnreachableInst(OldTerm->getContext(), OldTerm);
+ } else {
+ // One of the selected values was a successor, but the other wasn't.
+ // Insert an unconditional branch to the one that was found;
+ // the edge to the one that wasn't must be unreachable.
+ if (KeepEdge1 == 0)
+ // Only TrueBB was found.
+ BranchInst::Create(TrueBB, OldTerm);
+ else
+ // Only FalseBB was found.
+ BranchInst::Create(FalseBB, OldTerm);
}
- // Check to see if we can constant propagate this terminator instruction
- // away...
- Changed |= ConstantFoldTerminator(BB);
+ EraseTerminatorInstAndDCECond(OldTerm);
+ return true;
+}
- // Check for and eliminate duplicate PHI nodes in this block.
- Changed |= EliminateDuplicatePHINodes(BB);
+// SimplifyIndirectBrOnSelect - Replaces
+// (indirectbr (select cond, blockaddress(@fn, BlockA),
+// blockaddress(@fn, BlockB)))
+// with
+// (br cond, BlockA, BlockB).
+static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
+ // Check that both operands of the select are block addresses.
+ BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
+ BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
+ if (!TBA || !FBA)
+ return false;
- // If there is a trivial two-entry PHI node in this basic block, and we can
- // eliminate it, do so now.
- if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
- if (PN->getNumIncomingValues() == 2)
- Changed |= FoldTwoEntryPHINode(PN);
+ // Extract the actual blocks.
+ BasicBlock *TrueBB = TBA->getBasicBlock();
+ BasicBlock *FalseBB = FBA->getBasicBlock();
- // If this is a returning block with only PHI nodes in it, fold the return
- // instruction into any unconditional branch predecessors.
- //
- // If any predecessor is a conditional branch that just selects among
- // different return values, fold the replace the branch/return with a select
- // and return.
- if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
- if (isTerminatorFirstRelevantInsn(BB, BB->getTerminator())) {
- // Find predecessors that end with branches.
- SmallVector<BasicBlock*, 8> UncondBranchPreds;
- SmallVector<BranchInst*, 8> CondBranchPreds;
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *P = *PI;
- TerminatorInst *PTI = P->getTerminator();
- if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
- if (BI->isUnconditional())
- UncondBranchPreds.push_back(P);
- else
- CondBranchPreds.push_back(BI);
- }
- }
+ // Perform the actual simplification.
+ return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB);
+}
- // If we found some, do the transformation!
- if (!UncondBranchPreds.empty()) {
- while (!UncondBranchPreds.empty()) {
- BasicBlock *Pred = UncondBranchPreds.pop_back_val();
- DEBUG(dbgs() << "FOLDING: " << *BB
- << "INTO UNCOND BRANCH PRED: " << *Pred);
- Instruction *UncondBranch = Pred->getTerminator();
- // Clone the return and add it to the end of the predecessor.
- Instruction *NewRet = RI->clone();
- Pred->getInstList().push_back(NewRet);
-
- // If the return instruction returns a value, and if the value was a
- // PHI node in "BB", propagate the right value into the return.
- for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
- i != e; ++i)
- if (PHINode *PN = dyn_cast<PHINode>(*i))
- if (PN->getParent() == BB)
- *i = PN->getIncomingValueForBlock(Pred);
-
- // Update any PHI nodes in the returning block to realize that we no
- // longer branch to them.
- BB->removePredecessor(Pred);
- Pred->getInstList().erase(UncondBranch);
- }
+/// TryToSimplifyUncondBranchWithICmpInIt - This is called when we find an icmp
+/// instruction (a seteq/setne with a constant) as the only instruction in a
+/// block that ends with an uncond branch. We are looking for a very specific
+/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
+/// this case, we merge the first two "or's of icmp" into a switch, but then the
+/// default value goes to an uncond block with a seteq in it, we get something
+/// like:
+///
+/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
+/// DEFAULT:
+/// %tmp = icmp eq i8 %A, 92
+/// br label %end
+/// end:
+/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
+///
+/// We prefer to split the edge to 'end' so that there is a true/false entry to
+/// the PHI, merging the third icmp into the switch.
+static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
+ const TargetData *TD) {
+ BasicBlock *BB = ICI->getParent();
+ // If the block has any PHIs in it or the icmp has multiple uses, it is too
+ // complex.
+ if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse()) return false;
+
+ Value *V = ICI->getOperand(0);
+ ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
+
+ // The pattern we're looking for is where our only predecessor is a switch on
+ // 'V' and this block is the default case for the switch. In this case we can
+ // fold the compared value into the switch to simplify things.
+ BasicBlock *Pred = BB->getSinglePredecessor();
+ if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false;
+
+ SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
+ if (SI->getCondition() != V)
+ return false;
+
+ // If BB is reachable on a non-default case, then we simply know the value of
+ // V in this block. Substitute it and constant fold the icmp instruction
+ // away.
+ if (SI->getDefaultDest() != BB) {
+ ConstantInt *VVal = SI->findCaseDest(BB);
+ assert(VVal && "Should have a unique destination value");
+ ICI->setOperand(0, VVal);
+
+ if (Value *V = SimplifyInstruction(ICI, TD)) {
+ ICI->replaceAllUsesWith(V);
+ ICI->eraseFromParent();
+ }
+ // BB is now empty, so it is likely to simplify away.
+ return SimplifyCFG(BB) | true;
+ }
+
+ // Ok, the block is reachable from the default dest. If the constant we're
+ // comparing exists in one of the other edges, then we can constant fold ICI
+ // and zap it.
+ if (SI->findCaseValue(Cst) != 0) {
+ Value *V;
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ V = ConstantInt::getFalse(BB->getContext());
+ else
+ V = ConstantInt::getTrue(BB->getContext());
+
+ ICI->replaceAllUsesWith(V);
+ ICI->eraseFromParent();
+ // BB is now empty, so it is likely to simplify away.
+ return SimplifyCFG(BB) | true;
+ }
+
+ // The use of the icmp has to be in the 'end' block, by the only PHI node in
+ // the block.
+ BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
+ PHINode *PHIUse = dyn_cast<PHINode>(ICI->use_back());
+ if (PHIUse == 0 || PHIUse != &SuccBlock->front() ||
+ isa<PHINode>(++BasicBlock::iterator(PHIUse)))
+ return false;
- // If we eliminated all predecessors of the block, delete the block now.
- if (pred_begin(BB) == pred_end(BB))
- // We know there are no successors, so just nuke the block.
- M->getBasicBlockList().erase(BB);
+ // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
+ // true in the PHI.
+ Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
+ Constant *NewCst = ConstantInt::getFalse(BB->getContext());
- return true;
- }
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ std::swap(DefaultCst, NewCst);
- // Check out all of the conditional branches going to this return
- // instruction. If any of them just select between returns, change the
- // branch itself into a select/return pair.
- while (!CondBranchPreds.empty()) {
- BranchInst *BI = CondBranchPreds.pop_back_val();
-
- // Check to see if the non-BB successor is also a return block.
- if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
- isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
- SimplifyCondBranchToTwoReturns(BI))
- return true;
- }
- }
- } else if (isa<UnwindInst>(BB->begin())) {
- // Check to see if the first instruction in this block is just an unwind.
- // If so, replace any invoke instructions which use this as an exception
- // destination with call instructions.
- //
- SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
- while (!Preds.empty()) {
- BasicBlock *Pred = Preds.back();
- if (InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator()))
- if (II->getUnwindDest() == BB) {
- // Insert a new branch instruction before the invoke, because this
- // is now a fall through.
- BranchInst *BI = BranchInst::Create(II->getNormalDest(), II);
- Pred->getInstList().remove(II); // Take out of symbol table
-
- // Insert the call now.
- SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3);
- CallInst *CI = CallInst::Create(II->getCalledValue(),
- Args.begin(), Args.end(),
- II->getName(), BI);
- CI->setCallingConv(II->getCallingConv());
- CI->setAttributes(II->getAttributes());
- // If the invoke produced a value, the Call now does instead.
- II->replaceAllUsesWith(CI);
- delete II;
- Changed = true;
- }
+ // Replace ICI (which is used by the PHI for the default value) with true or
+ // false depending on if it is EQ or NE.
+ ICI->replaceAllUsesWith(DefaultCst);
+ ICI->eraseFromParent();
- Preds.pop_back();
- }
+ // Okay, the switch goes to this block on a default value. Add an edge from
+ // the switch to the merge point on the compared value.
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge",
+ BB->getParent(), BB);
+ SI->addCase(Cst, NewBB);
+
+ // NewBB branches to the phi block, add the uncond branch and the phi entry.
+ BranchInst::Create(SuccBlock, NewBB);
+ PHIUse->addIncoming(NewCst, NewBB);
+ return true;
+}
- // If this block is now dead, remove it.
- if (pred_begin(BB) == pred_end(BB)) {
- // We know there are no successors, so just nuke the block.
- M->getBasicBlockList().erase(BB);
- return true;
- }
+/// SimplifyBranchOnICmpChain - The specified branch is a conditional branch.
+/// Check to see if it is branching on an or/and chain of icmp instructions, and
+/// fold it into a switch instruction if so.
+static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD) {
+ Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
+ if (Cond == 0) return false;
+
+
+ // Change br (X == 0 | X == 1), T, F into a switch instruction.
+ // If this is a bunch of seteq's or'd together, or if it's a bunch of
+ // 'setne's and'ed together, collect them.
+ Value *CompVal = 0;
+ std::vector<ConstantInt*> Values;
+ bool TrueWhenEqual = true;
+ Value *ExtraCase = 0;
+ unsigned UsedICmps = 0;
+
+ if (Cond->getOpcode() == Instruction::Or) {
+ CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true,
+ UsedICmps);
+ } else if (Cond->getOpcode() == Instruction::And) {
+ CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, false,
+ UsedICmps);
+ TrueWhenEqual = false;
+ }
+
+ // If we didn't have a multiply compared value, fail.
+ if (CompVal == 0) return false;
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
- if (isValueEqualityComparison(SI)) {
- // If we only have one predecessor, and if it is a branch on this value,
- // see if that predecessor totally determines the outcome of this switch.
- if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
- if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred))
- return SimplifyCFG(BB) || 1;
-
- // If the block only contains the switch, see if we can fold the block
- // away into any preds.
- BasicBlock::iterator BBI = BB->begin();
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(BBI))
- ++BBI;
- if (SI == &*BBI)
- if (FoldValueComparisonIntoPredecessors(SI))
- return SimplifyCFG(BB) || 1;
- }
- } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
- if (BI->isUnconditional()) {
- BasicBlock::iterator BBI = BB->getFirstNonPHI();
+ // Avoid turning single icmps into a switch.
+ if (UsedICmps <= 1)
+ return false;
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(BBI))
- ++BBI;
- if (BBI->isTerminator()) // Terminator is the only non-phi instruction!
- if (BB != &BB->getParent()->getEntryBlock())
- if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
- return true;
+ // There might be duplicate constants in the list, which the switch
+ // instruction can't handle, remove them now.
+ array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
+ Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
+
+ // If Extra was used, we require at least two switch values to do the
+ // transformation. A switch with one value is just an cond branch.
+ if (ExtraCase && Values.size() < 2) return false;
+
+ // Figure out which block is which destination.
+ BasicBlock *DefaultBB = BI->getSuccessor(1);
+ BasicBlock *EdgeBB = BI->getSuccessor(0);
+ if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
+
+ BasicBlock *BB = BI->getParent();
+
+ DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
+ << " cases into SWITCH. BB is:\n" << *BB);
+
+ // If there are any extra values that couldn't be folded into the switch
+ // then we evaluate them with an explicit branch first. Split the block
+ // right before the condbr to handle it.
+ if (ExtraCase) {
+ BasicBlock *NewBB = BB->splitBasicBlock(BI, "switch.early.test");
+ // Remove the uncond branch added to the old block.
+ TerminatorInst *OldTI = BB->getTerminator();
+
+ if (TrueWhenEqual)
+ BranchInst::Create(EdgeBB, NewBB, ExtraCase, OldTI);
+ else
+ BranchInst::Create(NewBB, EdgeBB, ExtraCase, OldTI);
- } else { // Conditional branch
- if (isValueEqualityComparison(BI)) {
- // If we only have one predecessor, and if it is a branch on this value,
- // see if that predecessor totally determines the outcome of this
- // switch.
- if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
- if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred))
- return SimplifyCFG(BB) | true;
-
- // This block must be empty, except for the setcond inst, if it exists.
- // Ignore dbg intrinsics.
- BasicBlock::iterator I = BB->begin();
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(I))
- ++I;
- if (&*I == BI) {
- if (FoldValueComparisonIntoPredecessors(BI))
- return SimplifyCFG(BB) | true;
- } else if (&*I == cast<Instruction>(BI->getCondition())){
- ++I;
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(I))
- ++I;
- if(&*I == BI) {
- if (FoldValueComparisonIntoPredecessors(BI))
- return SimplifyCFG(BB) | true;
- }
- }
- }
+ OldTI->eraseFromParent();
+
+ // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
+ // for the edge we just added.
+ AddPredecessorToBlock(EdgeBB, BB, NewBB);
+
+ DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
+ << "\nEXTRABB = " << *BB);
+ BB = NewBB;
+ }
+
+ // Convert pointer to int before we switch.
+ if (CompVal->getType()->isPointerTy()) {
+ assert(TD && "Cannot switch on pointer without TargetData");
+ CompVal = new PtrToIntInst(CompVal,
+ TD->getIntPtrType(CompVal->getContext()),
+ "magicptr", BI);
+ }
+
+ // Create the new switch instruction now.
+ SwitchInst *New = SwitchInst::Create(CompVal, DefaultBB, Values.size(), BI);
+
+ // Add all of the 'cases' to the switch instruction.
+ for (unsigned i = 0, e = Values.size(); i != e; ++i)
+ New->addCase(Values[i], EdgeBB);
+
+ // We added edges from PI to the EdgeBB. As such, if there were any
+ // PHI nodes in EdgeBB, they need entries to be added corresponding to
+ // the number of edges added.
+ for (BasicBlock::iterator BBI = EdgeBB->begin();
+ isa<PHINode>(BBI); ++BBI) {
+ PHINode *PN = cast<PHINode>(BBI);
+ Value *InVal = PN->getIncomingValueForBlock(BB);
+ for (unsigned i = 0, e = Values.size()-1; i != e; ++i)
+ PN->addIncoming(InVal, BB);
+ }
+
+ // Erase the old branch instruction.
+ EraseTerminatorInstAndDCECond(BI);
+
+ DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
+ return true;
+}
- // If this is a branch on a phi node in the current block, thread control
- // through this block if any PHI node entries are constants.
- if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
- if (PN->getParent() == BI->getParent())
- if (FoldCondBranchOnPHI(BI))
- return SimplifyCFG(BB) | true;
-
- // If this basic block is ONLY a setcc and a branch, and if a predecessor
- // branches to us and one of our successors, fold the setcc into the
- // predecessor and use logical operations to pick the right destination.
- if (FoldBranchToCommonDest(BI))
- return SimplifyCFG(BB) | true;
+bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI) {
+ BasicBlock *BB = RI->getParent();
+ if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
+
+ // Find predecessors that end with branches.
+ SmallVector<BasicBlock*, 8> UncondBranchPreds;
+ SmallVector<BranchInst*, 8> CondBranchPreds;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ TerminatorInst *PTI = P->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
+ if (BI->isUnconditional())
+ UncondBranchPreds.push_back(P);
+ else
+ CondBranchPreds.push_back(BI);
+ }
+ }
+
+ // If we found some, do the transformation!
+ if (!UncondBranchPreds.empty() && DupRet) {
+ while (!UncondBranchPreds.empty()) {
+ BasicBlock *Pred = UncondBranchPreds.pop_back_val();
+ DEBUG(dbgs() << "FOLDING: " << *BB
+ << "INTO UNCOND BRANCH PRED: " << *Pred);
+ (void)FoldReturnIntoUncondBranch(RI, BB, Pred);
+ }
+
+ // If we eliminated all predecessors of the block, delete the block now.
+ if (pred_begin(BB) == pred_end(BB))
+ // We know there are no successors, so just nuke the block.
+ BB->eraseFromParent();
+
+ return true;
+ }
+
+ // Check out all of the conditional branches going to this return
+ // instruction. If any of them just select between returns, change the
+ // branch itself into a select/return pair.
+ while (!CondBranchPreds.empty()) {
+ BranchInst *BI = CondBranchPreds.pop_back_val();
+
+ // Check to see if the non-BB successor is also a return block.
+ if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
+ isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
+ SimplifyCondBranchToTwoReturns(BI))
+ return true;
+ }
+ return false;
+}
+bool SimplifyCFGOpt::SimplifyUnwind(UnwindInst *UI) {
+ // Check to see if the first instruction in this block is just an unwind.
+ // If so, replace any invoke instructions which use this as an exception
+ // destination with call instructions.
+ BasicBlock *BB = UI->getParent();
+ if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
- // Scan predecessor blocks for conditional branches.
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
- if (PBI != BI && PBI->isConditional())
- if (SimplifyCondBranchToCondBranch(PBI, BI))
- return SimplifyCFG(BB) | true;
- }
- } else if (isa<UnreachableInst>(BB->getTerminator())) {
- // If there are any instructions immediately before the unreachable that can
- // be removed, do so.
- Instruction *Unreachable = BB->getTerminator();
- while (Unreachable != BB->begin()) {
- BasicBlock::iterator BBI = Unreachable;
- --BBI;
- // Do not delete instructions that can have side effects, like calls
- // (which may never return) and volatile loads and stores.
- if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break;
-
- if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
- if (SI->isVolatile())
- break;
-
- if (LoadInst *LI = dyn_cast<LoadInst>(BBI))
- if (LI->isVolatile())
- break;
-
- // Delete this instruction
- BB->getInstList().erase(BBI);
+ bool Changed = false;
+ SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
+ while (!Preds.empty()) {
+ BasicBlock *Pred = Preds.back();
+ InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator());
+ if (II && II->getUnwindDest() == BB) {
+ // Insert a new branch instruction before the invoke, because this
+ // is now a fall through.
+ BranchInst *BI = BranchInst::Create(II->getNormalDest(), II);
+ Pred->getInstList().remove(II); // Take out of symbol table
+
+ // Insert the call now.
+ SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3);
+ CallInst *CI = CallInst::Create(II->getCalledValue(),
+ Args.begin(), Args.end(),
+ II->getName(), BI);
+ CI->setCallingConv(II->getCallingConv());
+ CI->setAttributes(II->getAttributes());
+ // If the invoke produced a value, the Call now does instead.
+ II->replaceAllUsesWith(CI);
+ delete II;
Changed = true;
}
+
+ Preds.pop_back();
+ }
+
+ // If this block is now dead (and isn't the entry block), remove it.
+ if (pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock()) {
+ // We know there are no successors, so just nuke the block.
+ BB->eraseFromParent();
+ return true;
+ }
+
+ return Changed;
+}
- // If the unreachable instruction is the first in the block, take a gander
- // at all of the predecessors of this instruction, and simplify them.
- if (&BB->front() == Unreachable) {
- SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
- for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
- TerminatorInst *TI = Preds[i]->getTerminator();
-
- if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
- if (BI->isUnconditional()) {
- if (BI->getSuccessor(0) == BB) {
- new UnreachableInst(TI->getContext(), TI);
- TI->eraseFromParent();
- Changed = true;
- }
- } else {
- if (BI->getSuccessor(0) == BB) {
- BranchInst::Create(BI->getSuccessor(1), BI);
- EraseTerminatorInstAndDCECond(BI);
- } else if (BI->getSuccessor(1) == BB) {
- BranchInst::Create(BI->getSuccessor(0), BI);
- EraseTerminatorInstAndDCECond(BI);
- Changed = true;
- }
+bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
+ BasicBlock *BB = UI->getParent();
+
+ bool Changed = false;
+
+ // If there are any instructions immediately before the unreachable that can
+ // be removed, do so.
+ while (UI != BB->begin()) {
+ BasicBlock::iterator BBI = UI;
+ --BBI;
+ // Do not delete instructions that can have side effects, like calls
+ // (which may never return) and volatile loads and stores.
+ if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
+ if (SI->isVolatile())
+ break;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(BBI))
+ if (LI->isVolatile())
+ break;
+
+ // Delete this instruction
+ BBI->eraseFromParent();
+ Changed = true;
+ }
+
+ // If the unreachable instruction is the first in the block, take a gander
+ // at all of the predecessors of this instruction, and simplify them.
+ if (&BB->front() != UI) return Changed;
+
+ SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ TerminatorInst *TI = Preds[i]->getTerminator();
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isUnconditional()) {
+ if (BI->getSuccessor(0) == BB) {
+ new UnreachableInst(TI->getContext(), TI);
+ TI->eraseFromParent();
+ Changed = true;
+ }
+ } else {
+ if (BI->getSuccessor(0) == BB) {
+ BranchInst::Create(BI->getSuccessor(1), BI);
+ EraseTerminatorInstAndDCECond(BI);
+ } else if (BI->getSuccessor(1) == BB) {
+ BranchInst::Create(BI->getSuccessor(0), BI);
+ EraseTerminatorInstAndDCECond(BI);
+ Changed = true;
+ }
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+ if (SI->getSuccessor(i) == BB) {
+ BB->removePredecessor(SI->getParent());
+ SI->removeCase(i);
+ --i; --e;
+ Changed = true;
+ }
+ // If the default value is unreachable, figure out the most popular
+ // destination and make it the default.
+ if (SI->getSuccessor(0) == BB) {
+ std::map<BasicBlock*, unsigned> Popularity;
+ for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+ Popularity[SI->getSuccessor(i)]++;
+
+ // Find the most popular block.
+ unsigned MaxPop = 0;
+ BasicBlock *MaxBlock = 0;
+ for (std::map<BasicBlock*, unsigned>::iterator
+ I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
+ if (I->second > MaxPop) {
+ MaxPop = I->second;
+ MaxBlock = I->first;
}
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ }
+ if (MaxBlock) {
+ // Make this the new default, allowing us to delete any explicit
+ // edges to it.
+ SI->setSuccessor(0, MaxBlock);
+ Changed = true;
+
+ // If MaxBlock has phinodes in it, remove MaxPop-1 entries from
+ // it.
+ if (isa<PHINode>(MaxBlock->begin()))
+ for (unsigned i = 0; i != MaxPop-1; ++i)
+ MaxBlock->removePredecessor(SI->getParent());
+
for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
- if (SI->getSuccessor(i) == BB) {
- BB->removePredecessor(SI->getParent());
+ if (SI->getSuccessor(i) == MaxBlock) {
SI->removeCase(i);
--i; --e;
- Changed = true;
- }
- // If the default value is unreachable, figure out the most popular
- // destination and make it the default.
- if (SI->getSuccessor(0) == BB) {
- std::map<BasicBlock*, unsigned> Popularity;
- for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
- Popularity[SI->getSuccessor(i)]++;
-
- // Find the most popular block.
- unsigned MaxPop = 0;
- BasicBlock *MaxBlock = 0;
- for (std::map<BasicBlock*, unsigned>::iterator
- I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
- if (I->second > MaxPop) {
- MaxPop = I->second;
- MaxBlock = I->first;
- }
- }
- if (MaxBlock) {
- // Make this the new default, allowing us to delete any explicit
- // edges to it.
- SI->setSuccessor(0, MaxBlock);
- Changed = true;
-
- // If MaxBlock has phinodes in it, remove MaxPop-1 entries from
- // it.
- if (isa<PHINode>(MaxBlock->begin()))
- for (unsigned i = 0; i != MaxPop-1; ++i)
- MaxBlock->removePredecessor(SI->getParent());
-
- for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
- if (SI->getSuccessor(i) == MaxBlock) {
- SI->removeCase(i);
- --i; --e;
- }
}
- }
- } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) {
- if (II->getUnwindDest() == BB) {
- // Convert the invoke to a call instruction. This would be a good
- // place to note that the call does not throw though.
- BranchInst *BI = BranchInst::Create(II->getNormalDest(), II);
- II->removeFromParent(); // Take out of symbol table
-
- // Insert the call now...
- SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
- CallInst *CI = CallInst::Create(II->getCalledValue(),
- Args.begin(), Args.end(),
- II->getName(), BI);
- CI->setCallingConv(II->getCallingConv());
- CI->setAttributes(II->getAttributes());
- // If the invoke produced a value, the call does now instead.
- II->replaceAllUsesWith(CI);
- delete II;
- Changed = true;
- }
}
}
-
- // If this block is now dead, remove it.
- if (pred_begin(BB) == pred_end(BB) &&
- BB != &BB->getParent()->getEntryBlock()) {
- // We know there are no successors, so just nuke the block.
- M->getBasicBlockList().erase(BB);
- return true;
- }
- }
- } else if (IndirectBrInst *IBI =
- dyn_cast<IndirectBrInst>(BB->getTerminator())) {
- // Eliminate redundant destinations.
- SmallPtrSet<Value *, 8> Succs;
- for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
- BasicBlock *Dest = IBI->getDestination(i);
- if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) {
- Dest->removePredecessor(BB);
- IBI->removeDestination(i);
- --i; --e;
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) {
+ if (II->getUnwindDest() == BB) {
+ // Convert the invoke to a call instruction. This would be a good
+ // place to note that the call does not throw though.
+ BranchInst *BI = BranchInst::Create(II->getNormalDest(), II);
+ II->removeFromParent(); // Take out of symbol table
+
+ // Insert the call now...
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
+ CallInst *CI = CallInst::Create(II->getCalledValue(),
+ Args.begin(), Args.end(),
+ II->getName(), BI);
+ CI->setCallingConv(II->getCallingConv());
+ CI->setAttributes(II->getAttributes());
+ // If the invoke produced a value, the call does now instead.
+ II->replaceAllUsesWith(CI);
+ delete II;
Changed = true;
}
- }
+ }
+ }
+
+ // If this block is now dead, remove it.
+ if (pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock()) {
+ // We know there are no successors, so just nuke the block.
+ BB->eraseFromParent();
+ return true;
+ }
- if (IBI->getNumDestinations() == 0) {
- // If the indirectbr has no successors, change it to unreachable.
- new UnreachableInst(IBI->getContext(), IBI);
- IBI->eraseFromParent();
- Changed = true;
- } else if (IBI->getNumDestinations() == 1) {
- // If the indirectbr has one successor, change it to a direct branch.
- BranchInst::Create(IBI->getDestination(0), IBI);
- IBI->eraseFromParent();
+ return Changed;
+}
+
+/// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a
+/// integer range comparison into a sub, an icmp and a branch.
+static bool TurnSwitchRangeIntoICmp(SwitchInst *SI) {
+ assert(SI->getNumCases() > 2 && "Degenerate switch?");
+
+ // Make sure all cases point to the same destination and gather the values.
+ SmallVector<ConstantInt *, 16> Cases;
+ Cases.push_back(SI->getCaseValue(1));
+ for (unsigned I = 2, E = SI->getNumCases(); I != E; ++I) {
+ if (SI->getSuccessor(I-1) != SI->getSuccessor(I))
+ return false;
+ Cases.push_back(SI->getCaseValue(I));
+ }
+ assert(Cases.size() == SI->getNumCases()-1 && "Not all cases gathered");
+
+ // Sort the case values, then check if they form a range we can transform.
+ array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
+ for (unsigned I = 1, E = Cases.size(); I != E; ++I) {
+ if (Cases[I-1]->getValue() != Cases[I]->getValue()+1)
+ return false;
+ }
+
+ Constant *Offset = ConstantExpr::getNeg(Cases.back());
+ Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases()-1);
+
+ Value *Sub = SI->getCondition();
+ if (!Offset->isNullValue())
+ Sub = BinaryOperator::CreateAdd(Sub, Offset, Sub->getName()+".off", SI);
+ Value *Cmp = new ICmpInst(SI, ICmpInst::ICMP_ULT, Sub, NumCases, "switch");
+ BranchInst::Create(SI->getSuccessor(1), SI->getDefaultDest(), Cmp, SI);
+
+ // Prune obsolete incoming values off the successor's PHI nodes.
+ for (BasicBlock::iterator BBI = SI->getSuccessor(1)->begin();
+ isa<PHINode>(BBI); ++BBI) {
+ for (unsigned I = 0, E = SI->getNumCases()-2; I != E; ++I)
+ cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
+ }
+ SI->eraseFromParent();
+
+ return true;
+}
+
+bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI) {
+ // If this switch is too complex to want to look at, ignore it.
+ if (!isValueEqualityComparison(SI))
+ return false;
+
+ BasicBlock *BB = SI->getParent();
+
+ // If we only have one predecessor, and if it is a branch on this value,
+ // see if that predecessor totally determines the outcome of this switch.
+ if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+ if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred))
+ return SimplifyCFG(BB) | true;
+
+ // If the block only contains the switch, see if we can fold the block
+ // away into any preds.
+ BasicBlock::iterator BBI = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ if (SI == &*BBI)
+ if (FoldValueComparisonIntoPredecessors(SI))
+ return SimplifyCFG(BB) | true;
+
+ // Try to transform the switch into an icmp and a branch.
+ if (TurnSwitchRangeIntoICmp(SI))
+ return SimplifyCFG(BB) | true;
+
+ return false;
+}
+
+bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
+ BasicBlock *BB = IBI->getParent();
+ bool Changed = false;
+
+ // Eliminate redundant destinations.
+ SmallPtrSet<Value *, 8> Succs;
+ for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+ BasicBlock *Dest = IBI->getDestination(i);
+ if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) {
+ Dest->removePredecessor(BB);
+ IBI->removeDestination(i);
+ --i; --e;
Changed = true;
}
+ }
+
+ if (IBI->getNumDestinations() == 0) {
+ // If the indirectbr has no successors, change it to unreachable.
+ new UnreachableInst(IBI->getContext(), IBI);
+ EraseTerminatorInstAndDCECond(IBI);
+ return true;
+ }
+
+ if (IBI->getNumDestinations() == 1) {
+ // If the indirectbr has one successor, change it to a direct branch.
+ BranchInst::Create(IBI->getDestination(0), IBI);
+ EraseTerminatorInstAndDCECond(IBI);
+ return true;
}
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
+ if (SimplifyIndirectBrOnSelect(IBI, SI))
+ return SimplifyCFG(BB) | true;
+ }
+ return Changed;
+}
- // Merge basic blocks into their predecessor if there is only one distinct
- // pred, and if there is only one distinct successor of the predecessor, and
- // if there are no PHI nodes.
- //
- if (MergeBlockIntoPredecessor(BB))
+bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI) {
+ BasicBlock *BB = BI->getParent();
+
+ // If the Terminator is the only non-phi instruction, simplify the block.
+ BasicBlock::iterator I = BB->getFirstNonPHIOrDbg();
+ if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
+ TryToSimplifyUncondBranchFromEmptyBlock(BB))
return true;
+
+ // If the only instruction in the block is a seteq/setne comparison
+ // against a constant, try to simplify the block.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
+ if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
+ for (++I; isa<DbgInfoIntrinsic>(I); ++I)
+ ;
+ if (I->isTerminator() && TryToSimplifyUncondBranchWithICmpInIt(ICI, TD))
+ return true;
+ }
+
+ return false;
+}
- // Otherwise, if this block only has a single predecessor, and if that block
- // is a conditional branch, see if we can hoist any code from this block up
- // into our predecessor.
- pred_iterator PI(pred_begin(BB)), PE(pred_end(BB));
- BasicBlock *OnlyPred = 0;
- for (; PI != PE; ++PI) { // Search all predecessors, see if they are all same
- if (!OnlyPred)
- OnlyPred = *PI;
- else if (*PI != OnlyPred) {
- OnlyPred = 0; // There are multiple different predecessors...
- break;
+
+bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI) {
+ BasicBlock *BB = BI->getParent();
+
+ // Conditional branch
+ if (isValueEqualityComparison(BI)) {
+ // If we only have one predecessor, and if it is a branch on this value,
+ // see if that predecessor totally determines the outcome of this
+ // switch.
+ if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+ if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred))
+ return SimplifyCFG(BB) | true;
+
+ // This block must be empty, except for the setcond inst, if it exists.
+ // Ignore dbg intrinsics.
+ BasicBlock::iterator I = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(I))
+ ++I;
+ if (&*I == BI) {
+ if (FoldValueComparisonIntoPredecessors(BI))
+ return SimplifyCFG(BB) | true;
+ } else if (&*I == cast<Instruction>(BI->getCondition())){
+ ++I;
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(I))
+ ++I;
+ if (&*I == BI && FoldValueComparisonIntoPredecessors(BI))
+ return SimplifyCFG(BB) | true;
}
}
- if (OnlyPred)
- if (BranchInst *BI = dyn_cast<BranchInst>(OnlyPred->getTerminator()))
- if (BI->isConditional()) {
- // Get the other block.
- BasicBlock *OtherBB = BI->getSuccessor(BI->getSuccessor(0) == BB);
- PI = pred_begin(OtherBB);
- ++PI;
-
- if (PI == pred_end(OtherBB)) {
- // We have a conditional branch to two blocks that are only reachable
- // from the condbr. We know that the condbr dominates the two blocks,
- // so see if there is any identical code in the "then" and "else"
- // blocks. If so, we can hoist it up to the branching block.
- Changed |= HoistThenElseCodeToIf(BI);
- } else {
- BasicBlock* OnlySucc = NULL;
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
- SI != SE; ++SI) {
- if (!OnlySucc)
- OnlySucc = *SI;
- else if (*SI != OnlySucc) {
- OnlySucc = 0; // There are multiple distinct successors!
- break;
- }
- }
+ // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
+ if (SimplifyBranchOnICmpChain(BI, TD))
+ return true;
+
+ // We have a conditional branch to two blocks that are only reachable
+ // from BI. We know that the condbr dominates the two blocks, so see if
+ // there is any identical code in the "then" and "else" blocks. If so, we
+ // can hoist it up to the branching block.
+ if (BI->getSuccessor(0)->getSinglePredecessor() != 0) {
+ if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+ if (HoistThenElseCodeToIf(BI))
+ return SimplifyCFG(BB) | true;
+ } else {
+ // If Successor #1 has multiple preds, we may be able to conditionally
+ // execute Successor #0 if it branches to successor #1.
+ TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator();
+ if (Succ0TI->getNumSuccessors() == 1 &&
+ Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
+ return SimplifyCFG(BB) | true;
+ }
+ } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+ // If Successor #0 has multiple preds, we may be able to conditionally
+ // execute Successor #1 if it branches to successor #0.
+ TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator();
+ if (Succ1TI->getNumSuccessors() == 1 &&
+ Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
+ return SimplifyCFG(BB) | true;
+ }
+
+ // If this is a branch on a phi node in the current block, thread control
+ // through this block if any PHI node entries are constants.
+ if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
+ if (PN->getParent() == BI->getParent())
+ if (FoldCondBranchOnPHI(BI, TD))
+ return SimplifyCFG(BB) | true;
+
+ // If this basic block is ONLY a setcc and a branch, and if a predecessor
+ // branches to us and one of our successors, fold the setcc into the
+ // predecessor and use logical operations to pick the right destination.
+ if (FoldBranchToCommonDest(BI))
+ return SimplifyCFG(BB) | true;
+
+ // Scan predecessor blocks for conditional branches.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+ if (PBI != BI && PBI->isConditional())
+ if (SimplifyCondBranchToCondBranch(PBI, BI))
+ return SimplifyCFG(BB) | true;
- if (OnlySucc == OtherBB) {
- // If BB's only successor is the other successor of the predecessor,
- // i.e. a triangle, see if we can hoist any code from this block up
- // to the "if" block.
- Changed |= SpeculativelyExecuteBB(BI, BB);
- }
- }
- }
+ return false;
+}
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- if (BranchInst *BI = dyn_cast<BranchInst>((*PI)->getTerminator()))
- // Change br (X == 0 | X == 1), T, F into a switch instruction.
- if (BI->isConditional() && isa<Instruction>(BI->getCondition())) {
- Instruction *Cond = cast<Instruction>(BI->getCondition());
- // If this is a bunch of seteq's or'd together, or if it's a bunch of
- // 'setne's and'ed together, collect them.
- Value *CompVal = 0;
- std::vector<ConstantInt*> Values;
- bool TrueWhenEqual = GatherValueComparisons(Cond, CompVal, Values);
- if (CompVal) {
- // There might be duplicate constants in the list, which the switch
- // instruction can't handle, remove them now.
- std::sort(Values.begin(), Values.end(), ConstantIntOrdering());
- Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
-
- // Figure out which block is which destination.
- BasicBlock *DefaultBB = BI->getSuccessor(1);
- BasicBlock *EdgeBB = BI->getSuccessor(0);
- if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
-
- // Convert pointer to int before we switch.
- if (CompVal->getType()->isPointerTy()) {
- assert(TD && "Cannot switch on pointer without TargetData");
- CompVal = new PtrToIntInst(CompVal,
- TD->getIntPtrType(CompVal->getContext()),
- "magicptr", BI);
- }
+bool SimplifyCFGOpt::run(BasicBlock *BB) {
+ bool Changed = false;
- // Create the new switch instruction now.
- SwitchInst *New = SwitchInst::Create(CompVal, DefaultBB,
- Values.size(), BI);
-
- // Add all of the 'cases' to the switch instruction.
- for (unsigned i = 0, e = Values.size(); i != e; ++i)
- New->addCase(Values[i], EdgeBB);
-
- // We added edges from PI to the EdgeBB. As such, if there were any
- // PHI nodes in EdgeBB, they need entries to be added corresponding to
- // the number of edges added.
- for (BasicBlock::iterator BBI = EdgeBB->begin();
- isa<PHINode>(BBI); ++BBI) {
- PHINode *PN = cast<PHINode>(BBI);
- Value *InVal = PN->getIncomingValueForBlock(*PI);
- for (unsigned i = 0, e = Values.size()-1; i != e; ++i)
- PN->addIncoming(InVal, *PI);
- }
+ assert(BB && BB->getParent() && "Block not embedded in function!");
+ assert(BB->getTerminator() && "Degenerate basic block encountered!");
- // Erase the old branch instruction.
- EraseTerminatorInstAndDCECond(BI);
- return true;
- }
- }
+ // Remove basic blocks that have no predecessors (except the entry block)...
+ // or that just have themself as a predecessor. These are unreachable.
+ if ((pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock()) ||
+ BB->getSinglePredecessor() == BB) {
+ DEBUG(dbgs() << "Removing BB: \n" << *BB);
+ DeleteDeadBlock(BB);
+ return true;
+ }
+
+ // Check to see if we can constant propagate this terminator instruction
+ // away...
+ Changed |= ConstantFoldTerminator(BB);
+
+ // Check for and eliminate duplicate PHI nodes in this block.
+ Changed |= EliminateDuplicatePHINodes(BB);
+
+ // Merge basic blocks into their predecessor if there is only one distinct
+ // pred, and if there is only one distinct successor of the predecessor, and
+ // if there are no PHI nodes.
+ //
+ if (MergeBlockIntoPredecessor(BB))
+ return true;
+
+ // If there is a trivial two-entry PHI node in this basic block, and we can
+ // eliminate it, do so now.
+ if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
+ if (PN->getNumIncomingValues() == 2)
+ Changed |= FoldTwoEntryPHINode(PN, TD);
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ if (BI->isUnconditional()) {
+ if (SimplifyUncondBranch(BI)) return true;
+ } else {
+ if (SimplifyCondBranch(BI)) return true;
+ }
+ } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ if (SimplifyReturn(RI)) return true;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ if (SimplifySwitch(SI)) return true;
+ } else if (UnreachableInst *UI =
+ dyn_cast<UnreachableInst>(BB->getTerminator())) {
+ if (SimplifyUnreachable(UI)) return true;
+ } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+ if (SimplifyUnwind(UI)) return true;
+ } else if (IndirectBrInst *IBI =
+ dyn_cast<IndirectBrInst>(BB->getTerminator())) {
+ if (SimplifyIndirectBr(IBI)) return true;
+ }
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
new file mode 100644
index 0000000..ac005f9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -0,0 +1,94 @@
+//===------ SimplifyInstructions.cpp - Remove redundant instructions ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility pass used for testing the InstructionSimplify analysis.
+// The analysis is applied to every instruction, and if it simplifies then the
+// instruction is replaced by the simplification. If you are looking for a pass
+// that performs serious instruction folding, use the instcombine pass instead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instsimplify"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of redundant instructions removed");
+
+namespace {
+ struct InstSimplifier : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ InstSimplifier() : FunctionPass(ID) {
+ initializeInstSimplifierPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+
+ /// runOnFunction - Remove instructions that simplify.
+ bool runOnFunction(Function &F) {
+ const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
+ bool Changed = false;
+
+ do {
+ for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
+ DE = df_end(&F.getEntryBlock()); DI != DE; ++DI)
+ for (BasicBlock::iterator BI = DI->begin(), BE = DI->end(); BI != BE;) {
+ Instruction *I = BI++;
+ // The first time through the loop ToSimplify is empty and we try to
+ // simplify all instructions. On later iterations ToSimplify is not
+ // empty and we only bother simplifying instructions that are in it.
+ if (!ToSimplify->empty() && !ToSimplify->count(I))
+ continue;
+ // Don't waste time simplifying unused instructions.
+ if (!I->use_empty())
+ if (Value *V = SimplifyInstruction(I, TD, DT)) {
+ // Mark all uses for resimplification next time round the loop.
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ Next->insert(cast<Instruction>(*UI));
+ I->replaceAllUsesWith(V);
+ ++NumSimplified;
+ Changed = true;
+ }
+ Changed |= RecursivelyDeleteTriviallyDeadInstructions(I);
+ }
+
+ // Place the list of instructions to simplify on the next loop iteration
+ // into ToSimplify.
+ std::swap(ToSimplify, Next);
+ Next->clear();
+ } while (!ToSimplify->empty());
+
+ return Changed;
+ }
+ };
+}
+
+char InstSimplifier::ID = 0;
+INITIALIZE_PASS(InstSimplifier, "instsimplify", "Remove redundant instructions",
+ false, false)
+char &llvm::InstructionSimplifierID = InstSimplifier::ID;
+
+// Public interface to the simplify instructions pass.
+FunctionPass *llvm::createInstructionSimplifierPass() {
+ return new InstSimplifier();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index a51f1e1..ccb8287 100644
--- a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -25,7 +25,7 @@ using namespace llvm;
char UnifyFunctionExitNodes::ID = 0;
INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn",
- "Unify function exit nodes", false, false);
+ "Unify function exit nodes", false, false)
Pass *llvm::createUnifyFunctionExitNodesPass() {
return new UnifyFunctionExitNodes();
diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
new file mode 100644
index 0000000..24e8c8f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
@@ -0,0 +1,37 @@
+//===-- Utils.cpp - TransformUtils Infrastructure -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common initialization infrastructure for the
+// TransformUtils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeTransformUtils - Initialize all passes in the TransformUtils
+/// library.
+void llvm::initializeTransformUtils(PassRegistry &Registry) {
+ initializeBreakCriticalEdgesPass(Registry);
+ initializeInstNamerPass(Registry);
+ initializeLCSSAPass(Registry);
+ initializeLoopSimplifyPass(Registry);
+ initializeLowerInvokePass(Registry);
+ initializeLowerSwitchPass(Registry);
+ initializePromotePassPass(Registry);
+ initializeUnifyFunctionExitNodesPass(Registry);
+ initializeInstSimplifierPass(Registry);
+}
+
+/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses.
+void LLVMInitializeTransformUtils(LLVMPassRegistryRef R) {
+ initializeTransformUtils(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
index fc4bde7..f5481d3 100644
--- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -21,147 +21,111 @@
using namespace llvm;
Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
- bool ModuleLevelChanges) {
- Value *&VMSlot = VM[V];
- if (VMSlot) return VMSlot; // Does it exist in the map yet?
+ RemapFlags Flags) {
+ ValueToValueMapTy::iterator I = VM.find(V);
+
+ // If the value already exists in the map, use it.
+ if (I != VM.end() && I->second) return I->second;
- // NOTE: VMSlot can be invalidated by any reference to VM, which can grow the
- // DenseMap. This includes any recursive calls to MapValue.
-
// Global values do not need to be seeded into the VM if they
// are using the identity mapping.
- if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V) ||
- (isa<MDNode>(V) && !cast<MDNode>(V)->isFunctionLocal() &&
- !ModuleLevelChanges))
- return VMSlot = const_cast<Value*>(V);
+ if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V))
+ return VM[V] = const_cast<Value*>(V);
if (const MDNode *MD = dyn_cast<MDNode>(V)) {
- // Start by assuming that we'll use the identity mapping.
- VMSlot = const_cast<Value*>(V);
-
+ // If this is a module-level metadata and we know that nothing at the module
+ // level is changing, then use an identity mapping.
+ if (!MD->isFunctionLocal() && (Flags & RF_NoModuleLevelChanges))
+ return VM[V] = const_cast<Value*>(V);
+
+ // Create a dummy node in case we have a metadata cycle.
+ MDNode *Dummy = MDNode::getTemporary(V->getContext(), 0, 0);
+ VM[V] = Dummy;
+
// Check all operands to see if any need to be remapped.
for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
Value *OP = MD->getOperand(i);
- if (!OP || MapValue(OP, VM, ModuleLevelChanges) == OP) continue;
+ if (OP == 0 || MapValue(OP, VM, Flags) == OP) continue;
- // Ok, at least one operand needs remapping.
- MDNode *Dummy = MDNode::getTemporary(V->getContext(), 0, 0);
- VM[V] = Dummy;
+ // Ok, at least one operand needs remapping.
SmallVector<Value*, 4> Elts;
Elts.reserve(MD->getNumOperands());
- for (i = 0; i != e; ++i)
- Elts.push_back(MD->getOperand(i) ?
- MapValue(MD->getOperand(i), VM, ModuleLevelChanges) : 0);
+ for (i = 0; i != e; ++i) {
+ Value *Op = MD->getOperand(i);
+ Elts.push_back(Op ? MapValue(Op, VM, Flags) : 0);
+ }
MDNode *NewMD = MDNode::get(V->getContext(), Elts.data(), Elts.size());
Dummy->replaceAllUsesWith(NewMD);
+ VM[V] = NewMD;
MDNode::deleteTemporary(Dummy);
- return VM[V] = NewMD;
+ return NewMD;
}
- // No operands needed remapping; keep the identity map.
+ VM[V] = const_cast<Value*>(V);
+ MDNode::deleteTemporary(Dummy);
+
+ // No operands needed remapping. Use an identity mapping.
return const_cast<Value*>(V);
}
+ // Okay, this either must be a constant (which may or may not be mappable) or
+ // is something that is not in the mapping table.
Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V));
if (C == 0)
return 0;
- if (isa<ConstantInt>(C) || isa<ConstantFP>(C) ||
- isa<ConstantPointerNull>(C) || isa<ConstantAggregateZero>(C) ||
- isa<UndefValue>(C))
- return VMSlot = C; // Primitive constants map directly
-
- if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
- for (User::op_iterator b = CA->op_begin(), i = b, e = CA->op_end();
- i != e; ++i) {
- Value *MV = MapValue(*i, VM, ModuleLevelChanges);
- if (MV != *i) {
- // This array must contain a reference to a global, make a new array
- // and return it.
- //
- std::vector<Constant*> Values;
- Values.reserve(CA->getNumOperands());
- for (User::op_iterator j = b; j != i; ++j)
- Values.push_back(cast<Constant>(*j));
- Values.push_back(cast<Constant>(MV));
- for (++i; i != e; ++i)
- Values.push_back(cast<Constant>(MapValue(*i, VM,
- ModuleLevelChanges)));
- return VM[V] = ConstantArray::get(CA->getType(), Values);
- }
- }
- return VM[V] = C;
- }
-
- if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
- for (User::op_iterator b = CS->op_begin(), i = b, e = CS->op_end();
- i != e; ++i) {
- Value *MV = MapValue(*i, VM, ModuleLevelChanges);
- if (MV != *i) {
- // This struct must contain a reference to a global, make a new struct
- // and return it.
- //
- std::vector<Constant*> Values;
- Values.reserve(CS->getNumOperands());
- for (User::op_iterator j = b; j != i; ++j)
- Values.push_back(cast<Constant>(*j));
- Values.push_back(cast<Constant>(MV));
- for (++i; i != e; ++i)
- Values.push_back(cast<Constant>(MapValue(*i, VM,
- ModuleLevelChanges)));
- return VM[V] = ConstantStruct::get(CS->getType(), Values);
- }
- }
- return VM[V] = C;
+ if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
+ Function *F = cast<Function>(MapValue(BA->getFunction(), VM, Flags));
+ BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM,
+ Flags));
+ return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
}
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+ Value *Op = C->getOperand(i);
+ Value *Mapped = MapValue(Op, VM, Flags);
+ if (Mapped == C) continue;
+
+ // Okay, the operands don't all match. We've already processed some or all
+ // of the operands, set them up now.
std::vector<Constant*> Ops;
- for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i)
- Ops.push_back(cast<Constant>(MapValue(*i, VM, ModuleLevelChanges)));
- return VM[V] = CE->getWithOperands(Ops);
+ Ops.reserve(C->getNumOperands());
+ for (unsigned j = 0; j != i; ++j)
+ Ops.push_back(cast<Constant>(C->getOperand(i)));
+ Ops.push_back(cast<Constant>(Mapped));
+
+ // Map the rest of the operands that aren't processed yet.
+ for (++i; i != e; ++i)
+ Ops.push_back(cast<Constant>(MapValue(C->getOperand(i), VM, Flags)));
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ return VM[V] = CE->getWithOperands(Ops);
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(C))
+ return VM[V] = ConstantArray::get(CA->getType(), Ops);
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C))
+ return VM[V] = ConstantStruct::get(CS->getType(), Ops);
+ assert(isa<ConstantVector>(C) && "Unknown mapped constant type");
+ return VM[V] = ConstantVector::get(Ops);
}
-
- if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
- for (User::op_iterator b = CV->op_begin(), i = b, e = CV->op_end();
- i != e; ++i) {
- Value *MV = MapValue(*i, VM, ModuleLevelChanges);
- if (MV != *i) {
- // This vector value must contain a reference to a global, make a new
- // vector constant and return it.
- //
- std::vector<Constant*> Values;
- Values.reserve(CV->getNumOperands());
- for (User::op_iterator j = b; j != i; ++j)
- Values.push_back(cast<Constant>(*j));
- Values.push_back(cast<Constant>(MV));
- for (++i; i != e; ++i)
- Values.push_back(cast<Constant>(MapValue(*i, VM,
- ModuleLevelChanges)));
- return VM[V] = ConstantVector::get(Values);
- }
- }
- return VM[V] = C;
- }
-
- BlockAddress *BA = cast<BlockAddress>(C);
- Function *F = cast<Function>(MapValue(BA->getFunction(), VM,
- ModuleLevelChanges));
- BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(),VM,
- ModuleLevelChanges));
- return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
+
+ // If we reach here, all of the operands of the constant match.
+ return VM[V] = C;
}
/// RemapInstruction - Convert the instruction operands from referencing the
/// current values into those specified by VMap.
///
void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
- bool ModuleLevelChanges) {
+ RemapFlags Flags) {
// Remap operands.
for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
- Value *V = MapValue(*op, VMap, ModuleLevelChanges);
- assert(V && "Referenced value not in value map!");
- *op = V;
+ Value *V = MapValue(*op, VMap, Flags);
+ // If we aren't ignoring missing entries, assert that something happened.
+ if (V != 0)
+ *op = V;
+ else
+ assert((Flags & RF_IgnoreMissingEntries) &&
+ "Referenced value not in value map!");
}
// Remap attached metadata.
@@ -170,7 +134,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
Value *Old = MI->second;
- Value *New = MapValue(Old, VMap, ModuleLevelChanges);
+ Value *New = MapValue(Old, VMap, Flags);
if (New != Old)
I->setMetadata(MI->first, cast<MDNode>(New));
}
OpenPOWER on IntegriCloud