summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms')
-rw-r--r--contrib/llvm/lib/Transforms/Hello/CMakeLists.txt3
-rw-r--r--contrib/llvm/lib/Transforms/Hello/Hello.cpp65
-rw-r--r--contrib/llvm/lib/Transforms/Hello/Hello.exports0
-rw-r--r--contrib/llvm/lib/Transforms/Hello/Makefile24
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp889
-rw-r--r--contrib/llvm/lib/Transforms/IPO/CMakeLists.txt27
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp142
-rw-r--r--contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp939
-rw-r--r--contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp106
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp82
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp391
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp209
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp2576
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp277
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPO.cpp84
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp80
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp111
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Inliner.cpp548
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Internalize.cpp190
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp241
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp545
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Makefile15
-rw-r--r--contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp652
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp180
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp216
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PruneEH.cpp252
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp71
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp400
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp352
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/CMakeLists.txt17
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombine.h352
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp731
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp1994
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp1295
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp1686
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp2447
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp639
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp695
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp844
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp704
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp702
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp1113
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp561
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h105
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp1304
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/Makefile15
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/CMakeLists.txt5
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp114
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/Makefile15
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h108
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp218
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp131
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h31
-rw-r--r--contrib/llvm/lib/Transforms/Makefile20
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ADCE.cpp95
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp147
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CMakeLists.txt35
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp981
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp89
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp200
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DCE.cpp132
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp575
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp81
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVN.cpp2319
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp1034
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp1706
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp917
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp233
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp1270
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp423
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp3833
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp153
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp1051
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp161
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Makefile15
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp801
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp1075
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp130
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SCCP.cpp1997
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalar.cpp118
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp1835
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp327
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp157
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp2201
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Sink.cpp266
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp371
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp535
-rw-r--r--contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp598
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp551
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BasicInliner.cpp182
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp443
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp527
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CMakeLists.txt28
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp580
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneLoop.cpp151
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneModule.cpp137
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp795
-rw-r--r--contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp146
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp668
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp63
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LCSSA.cpp266
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp647
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp762
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp378
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp608
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp320
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Makefile15
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp89
-rw-r--r--contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp1053
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp344
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp2207
-rw-r--r--contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp141
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp177
113 files changed, 62652 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Transforms/Hello/CMakeLists.txt b/contrib/llvm/lib/Transforms/Hello/CMakeLists.txt
new file mode 100644
index 0000000..917b745
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Hello/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_loadable_module( LLVMHello
+ Hello.cpp
+ )
diff --git a/contrib/llvm/lib/Transforms/Hello/Hello.cpp b/contrib/llvm/lib/Transforms/Hello/Hello.cpp
new file mode 100644
index 0000000..838d550
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Hello/Hello.cpp
@@ -0,0 +1,65 @@
+//===- Hello.cpp - Example code from "Writing an LLVM Pass" ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements two versions of the LLVM "Hello World" pass described
+// in docs/WritingAnLLVMPass.html
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hello"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(HelloCounter, "Counts number of functions greeted");
+
+namespace {
+ // Hello - The first implementation, without getAnalysisUsage.
+ struct Hello : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ Hello() : FunctionPass(ID) {}
+
+ virtual bool runOnFunction(Function &F) {
+ ++HelloCounter;
+ errs() << "Hello: ";
+ errs().write_escaped(F.getName()) << '\n';
+ return false;
+ }
+ };
+}
+
+char Hello::ID = 0;
+INITIALIZE_PASS(Hello, "hello", "Hello World Pass", false, false);
+
+namespace {
+ // Hello2 - The second implementation with getAnalysisUsage implemented.
+ struct Hello2 : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ Hello2() : FunctionPass(ID) {}
+
+ virtual bool runOnFunction(Function &F) {
+ ++HelloCounter;
+ errs() << "Hello: ";
+ errs().write_escaped(F.getName()) << '\n';
+ return false;
+ }
+
+ // We don't modify the program, so we preserve all analyses
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char Hello2::ID = 0;
+INITIALIZE_PASS(Hello2, "hello2",
+ "Hello World Pass (with getAnalysisUsage implemented)",
+ false, false);
diff --git a/contrib/llvm/lib/Transforms/Hello/Hello.exports b/contrib/llvm/lib/Transforms/Hello/Hello.exports
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Hello/Hello.exports
diff --git a/contrib/llvm/lib/Transforms/Hello/Makefile b/contrib/llvm/lib/Transforms/Hello/Makefile
new file mode 100644
index 0000000..f1e3148
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Hello/Makefile
@@ -0,0 +1,24 @@
+##===- lib/Transforms/Hello/Makefile -----------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMHello
+LOADABLE_MODULE = 1
+USEDLIBS =
+
+# If we don't need RTTI or EH, there's no reason to export anything
+# from the hello plugin.
+ifneq ($(REQUIRES_RTTI), 1)
+ifneq ($(REQUIRES_EH), 1)
+EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/Hello.exports
+endif
+endif
+
+include $(LEVEL)/Makefile.common
+
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
new file mode 100644
index 0000000..0c77e1f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -0,0 +1,889 @@
+//===-- ArgumentPromotion.cpp - Promote by-reference arguments ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass promotes "by reference" arguments to be "by value" arguments. In
+// practice, this means looking for internal functions that have pointer
+// arguments. If it can prove, through the use of alias analysis, that an
+// argument is *only* loaded, then it can pass the value into the function
+// instead of the address of the value. This can cause recursive simplification
+// of code and lead to the elimination of allocas (especially in C++ template
+// code like the STL).
+//
+// This pass also handles aggregate arguments that are passed into a function,
+// scalarizing them if the elements of the aggregate are only loaded. Note that
+// by default it refuses to scalarize aggregates which would require passing in
+// more than three operands to the function, because passing thousands of
+// operands for a large array or structure is unprofitable! This limit can be
+// configured or disabled, however.
+//
+// Note that this transformation could also be done for arguments that are only
+// stored to (returning the value instead), but does not currently. This case
+// would be best handled when and if LLVM begins supporting multiple return
+// values from functions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "argpromotion"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumArgumentsPromoted , "Number of pointer arguments promoted");
+STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted");
+STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted");
+STATISTIC(NumArgumentsDead , "Number of dead pointer args eliminated");
+
+namespace {
+ /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
+ ///
+ struct ArgPromotion : public CallGraphSCCPass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ CallGraphSCCPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnSCC(CallGraphSCC &SCC);
+ static char ID; // Pass identification, replacement for typeid
+ explicit ArgPromotion(unsigned maxElements = 3)
+ : CallGraphSCCPass(ID), maxElements(maxElements) {}
+
+ /// A vector used to hold the indices of a single GEP instruction
+ typedef std::vector<uint64_t> IndicesVector;
+
+ private:
+ CallGraphNode *PromoteArguments(CallGraphNode *CGN);
+ bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const;
+ CallGraphNode *DoPromotion(Function *F,
+ SmallPtrSet<Argument*, 8> &ArgsToPromote,
+ SmallPtrSet<Argument*, 8> &ByValArgsToTransform);
+ /// The maximum number of elements to expand, or 0 for unlimited.
+ unsigned maxElements;
+ };
+}
+
+char ArgPromotion::ID = 0;
+INITIALIZE_PASS(ArgPromotion, "argpromotion",
+ "Promote 'by reference' arguments to scalars", false, false);
+
+Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
+ return new ArgPromotion(maxElements);
+}
+
+bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
+ bool Changed = false, LocalChange;
+
+ do { // Iterate until we stop promoting from this SCC.
+ LocalChange = false;
+ // Attempt to promote arguments from all functions in this SCC.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ if (CallGraphNode *CGN = PromoteArguments(*I)) {
+ LocalChange = true;
+ SCC.ReplaceNode(*I, CGN);
+ }
+ }
+ Changed |= LocalChange; // Remember that we changed something.
+ } while (LocalChange);
+
+ return Changed;
+}
+
+/// PromoteArguments - This method checks the specified function to see if there
+/// are any promotable arguments and if it is safe to promote the function (for
+/// example, all callers are direct). If safe to promote some arguments, it
+/// calls the DoPromotion method.
+///
+CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
+ Function *F = CGN->getFunction();
+
+ // Make sure that it is local to this module.
+ if (!F || !F->hasLocalLinkage()) return 0;
+
+ // First check: see if there are any pointer arguments! If not, quick exit.
+ SmallVector<std::pair<Argument*, unsigned>, 16> PointerArgs;
+ unsigned ArgNo = 0;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++ArgNo)
+ if (I->getType()->isPointerTy())
+ PointerArgs.push_back(std::pair<Argument*, unsigned>(I, ArgNo));
+ if (PointerArgs.empty()) return 0;
+
+ // Second check: make sure that all callers are direct callers. We can't
+ // transform functions that have indirect callers.
+ if (F->hasAddressTaken())
+ return 0;
+
+ // Check to see which arguments are promotable. If an argument is promotable,
+ // add it to ArgsToPromote.
+ SmallPtrSet<Argument*, 8> ArgsToPromote;
+ SmallPtrSet<Argument*, 8> ByValArgsToTransform;
+ for (unsigned i = 0; i != PointerArgs.size(); ++i) {
+ bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal);
+
+ // If this is a byval argument, and if the aggregate type is small, just
+ // pass the elements, which is always safe.
+ Argument *PtrArg = PointerArgs[i].first;
+ if (isByVal) {
+ const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
+ if (const StructType *STy = dyn_cast<StructType>(AgTy)) {
+ if (maxElements > 0 && STy->getNumElements() > maxElements) {
+ DEBUG(dbgs() << "argpromotion disable promoting argument '"
+ << PtrArg->getName() << "' because it would require adding more"
+ << " than " << maxElements << " arguments to the function.\n");
+ } else {
+ // If all the elements are single-value types, we can promote it.
+ bool AllSimple = true;
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ if (!STy->getElementType(i)->isSingleValueType()) {
+ AllSimple = false;
+ break;
+ }
+
+ // Safe to transform, don't even bother trying to "promote" it.
+ // Passing the elements as a scalar will allow scalarrepl to hack on
+ // the new alloca we introduce.
+ if (AllSimple) {
+ ByValArgsToTransform.insert(PtrArg);
+ continue;
+ }
+ }
+ }
+ }
+
+ // Otherwise, see if we can promote the pointer to its value.
+ if (isSafeToPromoteArgument(PtrArg, isByVal))
+ ArgsToPromote.insert(PtrArg);
+ }
+
+ // No promotable pointer arguments.
+ if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
+ return 0;
+
+ return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
+}
+
+/// IsAlwaysValidPointer - Return true if the specified pointer is always legal
+/// to load.
+static bool IsAlwaysValidPointer(Value *V) {
+ if (isa<AllocaInst>(V) || isa<GlobalVariable>(V)) return true;
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V))
+ return IsAlwaysValidPointer(GEP->getOperand(0));
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::GetElementPtr)
+ return IsAlwaysValidPointer(CE->getOperand(0));
+
+ return false;
+}
+
+/// AllCalleesPassInValidPointerForArgument - Return true if we can prove that
+/// all callees pass in a valid pointer for the specified function argument.
+static bool AllCalleesPassInValidPointerForArgument(Argument *Arg) {
+ Function *Callee = Arg->getParent();
+
+ unsigned ArgNo = std::distance(Callee->arg_begin(),
+ Function::arg_iterator(Arg));
+
+ // Look at all call sites of the function. At this pointer we know we only
+ // have direct callees.
+ for (Value::use_iterator UI = Callee->use_begin(), E = Callee->use_end();
+ UI != E; ++UI) {
+ CallSite CS(*UI);
+ assert(CS && "Should only have direct calls!");
+
+ if (!IsAlwaysValidPointer(CS.getArgument(ArgNo)))
+ return false;
+ }
+ return true;
+}
+
+/// Returns true if Prefix is a prefix of longer. That means, Longer has a size
+/// that is greater than or equal to the size of prefix, and each of the
+/// elements in Prefix is the same as the corresponding elements in Longer.
+///
+/// This means it also returns true when Prefix and Longer are equal!
+static bool IsPrefix(const ArgPromotion::IndicesVector &Prefix,
+ const ArgPromotion::IndicesVector &Longer) {
+ if (Prefix.size() > Longer.size())
+ return false;
+ for (unsigned i = 0, e = Prefix.size(); i != e; ++i)
+ if (Prefix[i] != Longer[i])
+ return false;
+ return true;
+}
+
+
+/// Checks if Indices, or a prefix of Indices, is in Set.
+static bool PrefixIn(const ArgPromotion::IndicesVector &Indices,
+ std::set<ArgPromotion::IndicesVector> &Set) {
+ std::set<ArgPromotion::IndicesVector>::iterator Low;
+ Low = Set.upper_bound(Indices);
+ if (Low != Set.begin())
+ Low--;
+ // Low is now the last element smaller than or equal to Indices. This means
+ // it points to a prefix of Indices (possibly Indices itself), if such
+ // prefix exists.
+ //
+ // This load is safe if any prefix of its operands is safe to load.
+ return Low != Set.end() && IsPrefix(*Low, Indices);
+}
+
+/// Mark the given indices (ToMark) as safe in the given set of indices
+/// (Safe). Marking safe usually means adding ToMark to Safe. However, if there
+/// is already a prefix of Indices in Safe, Indices are implicitely marked safe
+/// already. Furthermore, any indices that Indices is itself a prefix of, are
+/// removed from Safe (since they are implicitely safe because of Indices now).
+static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark,
+ std::set<ArgPromotion::IndicesVector> &Safe) {
+ std::set<ArgPromotion::IndicesVector>::iterator Low;
+ Low = Safe.upper_bound(ToMark);
+ // Guard against the case where Safe is empty
+ if (Low != Safe.begin())
+ Low--;
+ // Low is now the last element smaller than or equal to Indices. This
+ // means it points to a prefix of Indices (possibly Indices itself), if
+ // such prefix exists.
+ if (Low != Safe.end()) {
+ if (IsPrefix(*Low, ToMark))
+ // If there is already a prefix of these indices (or exactly these
+ // indices) marked a safe, don't bother adding these indices
+ return;
+
+ // Increment Low, so we can use it as a "insert before" hint
+ ++Low;
+ }
+ // Insert
+ Low = Safe.insert(Low, ToMark);
+ ++Low;
+ // If there we're a prefix of longer index list(s), remove those
+ std::set<ArgPromotion::IndicesVector>::iterator End = Safe.end();
+ while (Low != End && IsPrefix(ToMark, *Low)) {
+ std::set<ArgPromotion::IndicesVector>::iterator Remove = Low;
+ ++Low;
+ Safe.erase(Remove);
+ }
+}
+
+/// isSafeToPromoteArgument - As you might guess from the name of this method,
+/// it checks to see if it is both safe and useful to promote the argument.
+/// This method limits promotion of aggregates to only promote up to three
+/// elements of the aggregate in order to avoid exploding the number of
+/// arguments passed in.
+bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
+ typedef std::set<IndicesVector> GEPIndicesSet;
+
+ // Quick exit for unused arguments
+ if (Arg->use_empty())
+ return true;
+
+ // We can only promote this argument if all of the uses are loads, or are GEP
+ // instructions (with constant indices) that are subsequently loaded.
+ //
+ // Promoting the argument causes it to be loaded in the caller
+ // unconditionally. This is only safe if we can prove that either the load
+ // would have happened in the callee anyway (ie, there is a load in the entry
+ // block) or the pointer passed in at every call site is guaranteed to be
+ // valid.
+ // In the former case, invalid loads can happen, but would have happened
+ // anyway, in the latter case, invalid loads won't happen. This prevents us
+ // from introducing an invalid load that wouldn't have happened in the
+ // original code.
+ //
+ // This set will contain all sets of indices that are loaded in the entry
+ // block, and thus are safe to unconditionally load in the caller.
+ GEPIndicesSet SafeToUnconditionallyLoad;
+
+ // This set contains all the sets of indices that we are planning to promote.
+ // This makes it possible to limit the number of arguments added.
+ GEPIndicesSet ToPromote;
+
+ // If the pointer is always valid, any load with first index 0 is valid.
+ if (isByVal || AllCalleesPassInValidPointerForArgument(Arg))
+ SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
+
+ // First, iterate the entry block and mark loads of (geps of) arguments as
+ // safe.
+ BasicBlock *EntryBlock = Arg->getParent()->begin();
+ // Declare this here so we can reuse it
+ IndicesVector Indices;
+ for (BasicBlock::iterator I = EntryBlock->begin(), E = EntryBlock->end();
+ I != E; ++I)
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ Value *V = LI->getPointerOperand();
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
+ V = GEP->getPointerOperand();
+ if (V == Arg) {
+ // This load actually loads (part of) Arg? Check the indices then.
+ Indices.reserve(GEP->getNumIndices());
+ for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
+ II != IE; ++II)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(*II))
+ Indices.push_back(CI->getSExtValue());
+ else
+ // We found a non-constant GEP index for this argument? Bail out
+ // right away, can't promote this argument at all.
+ return false;
+
+ // Indices checked out, mark them as safe
+ MarkIndicesSafe(Indices, SafeToUnconditionallyLoad);
+ Indices.clear();
+ }
+ } else if (V == Arg) {
+ // Direct loads are equivalent to a GEP with a single 0 index.
+ MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
+ }
+ }
+
+ // Now, iterate all uses of the argument to see if there are any uses that are
+ // not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
+ SmallVector<LoadInst*, 16> Loads;
+ IndicesVector Operands;
+ for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end();
+ UI != E; ++UI) {
+ User *U = *UI;
+ Operands.clear();
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ if (LI->isVolatile()) return false; // Don't hack volatile loads
+ Loads.push_back(LI);
+ // Direct loads are equivalent to a GEP with a zero index and then a load.
+ Operands.push_back(0);
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ if (GEP->use_empty()) {
+ // Dead GEP's cause trouble later. Just remove them if we run into
+ // them.
+ getAnalysis<AliasAnalysis>().deleteValue(GEP);
+ GEP->eraseFromParent();
+ // TODO: This runs the above loop over and over again for dead GEPs
+ // Couldn't we just do increment the UI iterator earlier and erase the
+ // use?
+ return isSafeToPromoteArgument(Arg, isByVal);
+ }
+
+ // Ensure that all of the indices are constants.
+ for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end();
+ i != e; ++i)
+ if (ConstantInt *C = dyn_cast<ConstantInt>(*i))
+ Operands.push_back(C->getSExtValue());
+ else
+ return false; // Not a constant operand GEP!
+
+ // Ensure that the only users of the GEP are load instructions.
+ for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end();
+ UI != E; ++UI)
+ if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+ if (LI->isVolatile()) return false; // Don't hack volatile loads
+ Loads.push_back(LI);
+ } else {
+ // Other uses than load?
+ return false;
+ }
+ } else {
+ return false; // Not a load or a GEP.
+ }
+
+ // Now, see if it is safe to promote this load / loads of this GEP. Loading
+ // is safe if Operands, or a prefix of Operands, is marked as safe.
+ if (!PrefixIn(Operands, SafeToUnconditionallyLoad))
+ return false;
+
+ // See if we are already promoting a load with these indices. If not, check
+ // to make sure that we aren't promoting too many elements. If so, nothing
+ // to do.
+ if (ToPromote.find(Operands) == ToPromote.end()) {
+ if (maxElements > 0 && ToPromote.size() == maxElements) {
+ DEBUG(dbgs() << "argpromotion not promoting argument '"
+ << Arg->getName() << "' because it would require adding more "
+ << "than " << maxElements << " arguments to the function.\n");
+ // We limit aggregate promotion to only promoting up to a fixed number
+ // of elements of the aggregate.
+ return false;
+ }
+ ToPromote.insert(Operands);
+ }
+ }
+
+ if (Loads.empty()) return true; // No users, this is a dead argument.
+
+ // Okay, now we know that the argument is only used by load instructions and
+ // it is safe to unconditionally perform all of them. Use alias analysis to
+ // check to see if the pointer is guaranteed to not be modified from entry of
+ // the function to each of the load instructions.
+
+ // Because there could be several/many load instructions, remember which
+ // blocks we know to be transparent to the load.
+ SmallPtrSet<BasicBlock*, 16> TranspBlocks;
+
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ if (!TD) return false; // Without TargetData, assume the worst.
+
+ for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
+ // Check to see if the load is invalidated from the start of the block to
+ // the load itself.
+ LoadInst *Load = Loads[i];
+ BasicBlock *BB = Load->getParent();
+
+ const PointerType *LoadTy =
+ cast<PointerType>(Load->getPointerOperand()->getType());
+ unsigned LoadSize =(unsigned)TD->getTypeStoreSize(LoadTy->getElementType());
+
+ if (AA.canInstructionRangeModify(BB->front(), *Load, Arg, LoadSize))
+ return false; // Pointer is invalidated!
+
+ // Now check every path from the entry block to the load for transparency.
+ // To do this, we perform a depth first search on the inverse CFG from the
+ // loading block.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> >
+ I = idf_ext_begin(P, TranspBlocks),
+ E = idf_ext_end(P, TranspBlocks); I != E; ++I)
+ if (AA.canBasicBlockModify(**I, Arg, LoadSize))
+ return false;
+ }
+ }
+
+ // If the path from the entry of the function to each load is free of
+ // instructions that potentially invalidate the load, we can make the
+ // transformation!
+ return true;
+}
+
+/// DoPromotion - This method actually performs the promotion of the specified
+/// arguments, and returns the new function. At this point, we know that it's
+/// safe to do so.
+CallGraphNode *ArgPromotion::DoPromotion(Function *F,
+ SmallPtrSet<Argument*, 8> &ArgsToPromote,
+ SmallPtrSet<Argument*, 8> &ByValArgsToTransform) {
+
+ // Start by computing a new prototype for the function, which is the same as
+ // the old function, but has modified arguments.
+ const FunctionType *FTy = F->getFunctionType();
+ std::vector<const Type*> Params;
+
+ typedef std::set<IndicesVector> ScalarizeTable;
+
+ // ScalarizedElements - If we are promoting a pointer that has elements
+ // accessed out of it, keep track of which elements are accessed so that we
+ // can add one argument for each.
+ //
+ // Arguments that are directly loaded will have a zero element value here, to
+ // handle cases where there are both a direct load and GEP accesses.
+ //
+ std::map<Argument*, ScalarizeTable> ScalarizedElements;
+
+ // OriginalLoads - Keep track of a representative load instruction from the
+ // original function so that we can tell the alias analysis implementation
+ // what the new GEP/Load instructions we are inserting look like.
+ std::map<IndicesVector, LoadInst*> OriginalLoads;
+
+ // Attributes - Keep track of the parameter attributes for the arguments
+ // that we are *not* promoting. For the ones that we do promote, the parameter
+ // attributes are lost
+ SmallVector<AttributeWithIndex, 8> AttributesVec;
+ const AttrListPtr &PAL = F->getAttributes();
+
+ // Add any return attributes.
+ if (Attributes attrs = PAL.getRetAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+
+ // First, determine the new argument list
+ unsigned ArgIndex = 1;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
+ ++I, ++ArgIndex) {
+ if (ByValArgsToTransform.count(I)) {
+ // Simple byval argument? Just add all the struct element types.
+ const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ const StructType *STy = cast<StructType>(AgTy);
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ Params.push_back(STy->getElementType(i));
+ ++NumByValArgsPromoted;
+ } else if (!ArgsToPromote.count(I)) {
+ // Unchanged argument
+ Params.push_back(I->getType());
+ if (Attributes attrs = PAL.getParamAttributes(ArgIndex))
+ AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs));
+ } else if (I->use_empty()) {
+ // Dead argument (which are always marked as promotable)
+ ++NumArgumentsDead;
+ } else {
+ // Okay, this is being promoted. This means that the only uses are loads
+ // or GEPs which are only used by loads
+
+ // In this table, we will track which indices are loaded from the argument
+ // (where direct loads are tracked as no indices).
+ ScalarizeTable &ArgIndices = ScalarizedElements[I];
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+ ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ assert(isa<LoadInst>(User) || isa<GetElementPtrInst>(User));
+ IndicesVector Indices;
+ Indices.reserve(User->getNumOperands() - 1);
+ // Since loads will only have a single operand, and GEPs only a single
+ // non-index operand, this will record direct loads without any indices,
+ // and gep+loads with the GEP indices.
+ for (User::op_iterator II = User->op_begin() + 1, IE = User->op_end();
+ II != IE; ++II)
+ Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
+ // GEPs with a single 0 index can be merged with direct loads
+ if (Indices.size() == 1 && Indices.front() == 0)
+ Indices.clear();
+ ArgIndices.insert(Indices);
+ LoadInst *OrigLoad;
+ if (LoadInst *L = dyn_cast<LoadInst>(User))
+ OrigLoad = L;
+ else
+ // Take any load, we will use it only to update Alias Analysis
+ OrigLoad = cast<LoadInst>(User->use_back());
+ OriginalLoads[Indices] = OrigLoad;
+ }
+
+ // Add a parameter to the function for each element passed in.
+ for (ScalarizeTable::iterator SI = ArgIndices.begin(),
+ E = ArgIndices.end(); SI != E; ++SI) {
+ // not allowed to dereference ->begin() if size() is 0
+ Params.push_back(GetElementPtrInst::getIndexedType(I->getType(),
+ SI->begin(),
+ SI->end()));
+ assert(Params.back());
+ }
+
+ if (ArgIndices.size() == 1 && ArgIndices.begin()->empty())
+ ++NumArgumentsPromoted;
+ else
+ ++NumAggregatesPromoted;
+ }
+ }
+
+ // Add any function attributes.
+ if (Attributes attrs = PAL.getFnAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+
+ const Type *RetTy = FTy->getReturnType();
+
+ // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which
+ // have zero fixed arguments.
+ bool ExtraArgHack = false;
+ if (Params.empty() && FTy->isVarArg()) {
+ ExtraArgHack = true;
+ Params.push_back(Type::getInt32Ty(F->getContext()));
+ }
+
+ // Construct the new function type using the new arguments.
+ FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
+
+ // Create the new function body and insert it into the module.
+ Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
+ NF->copyAttributesFrom(F);
+
+
+ DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
+ << "From: " << *F);
+
+ // Recompute the parameter attributes list based on the new arguments for
+ // the function.
+ NF->setAttributes(AttrListPtr::get(AttributesVec.begin(),
+ AttributesVec.end()));
+ AttributesVec.clear();
+
+ F->getParent()->getFunctionList().insert(F, NF);
+ NF->takeName(F);
+
+ // Get the alias analysis information that we need to update to reflect our
+ // changes.
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ // Get the callgraph information that we need to update to reflect our
+ // changes.
+ CallGraph &CG = getAnalysis<CallGraph>();
+
+ // Get a new callgraph node for NF.
+ CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
+
+ // Loop over all of the callers of the function, transforming the call sites
+ // to pass in the loaded pointers.
+ //
+ SmallVector<Value*, 16> Args;
+ while (!F->use_empty()) {
+ CallSite CS(F->use_back());
+ assert(CS.getCalledFunction() == F);
+ Instruction *Call = CS.getInstruction();
+ const AttrListPtr &CallPAL = CS.getAttributes();
+
+ // Add any return attributes.
+ if (Attributes attrs = CallPAL.getRetAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+
+ // Loop over the operands, inserting GEP and loads in the caller as
+ // appropriate.
+ CallSite::arg_iterator AI = CS.arg_begin();
+ ArgIndex = 1;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++AI, ++ArgIndex)
+ if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
+ Args.push_back(*AI); // Unmodified argument
+
+ if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
+ AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+
+ } else if (ByValArgsToTransform.count(I)) {
+ // Emit a GEP and load for each element of the struct.
+ const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ const StructType *STy = cast<StructType>(AgTy);
+ Value *Idxs[2] = {
+ ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
+ Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2,
+ (*AI)->getName()+"."+utostr(i),
+ Call);
+ // TODO: Tell AA about the new values?
+ Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
+ }
+ } else if (!I->use_empty()) {
+ // Non-dead argument: insert GEPs and loads as appropriate.
+ ScalarizeTable &ArgIndices = ScalarizedElements[I];
+ // Store the Value* version of the indices in here, but declare it now
+ // for reuse.
+ std::vector<Value*> Ops;
+ for (ScalarizeTable::iterator SI = ArgIndices.begin(),
+ E = ArgIndices.end(); SI != E; ++SI) {
+ Value *V = *AI;
+ LoadInst *OrigLoad = OriginalLoads[*SI];
+ if (!SI->empty()) {
+ Ops.reserve(SI->size());
+ const Type *ElTy = V->getType();
+ for (IndicesVector::const_iterator II = SI->begin(),
+ IE = SI->end(); II != IE; ++II) {
+ // Use i32 to index structs, and i64 for others (pointers/arrays).
+ // This satisfies GEP constraints.
+ const Type *IdxTy = (ElTy->isStructTy() ?
+ Type::getInt32Ty(F->getContext()) :
+ Type::getInt64Ty(F->getContext()));
+ Ops.push_back(ConstantInt::get(IdxTy, *II));
+ // Keep track of the type we're currently indexing.
+ ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
+ }
+ // And create a GEP to extract those indices.
+ V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(),
+ V->getName()+".idx", Call);
+ Ops.clear();
+ AA.copyValue(OrigLoad->getOperand(0), V);
+ }
+ // Since we're replacing a load make sure we take the alignment
+ // of the previous load.
+ LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
+ newLoad->setAlignment(OrigLoad->getAlignment());
+ Args.push_back(newLoad);
+ AA.copyValue(OrigLoad, Args.back());
+ }
+ }
+
+ if (ExtraArgHack)
+ Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext())));
+
+ // Push any varargs arguments on the list.
+ for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
+ Args.push_back(*AI);
+ if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
+ AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+ }
+
+ // Add any function attributes.
+ if (Attributes attrs = CallPAL.getFnAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+
+ Instruction *New;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args.begin(), Args.end(), "", Call);
+ cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(),
+ AttributesVec.end()));
+ } else {
+ New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
+ cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(),
+ AttributesVec.end()));
+ if (cast<CallInst>(Call)->isTailCall())
+ cast<CallInst>(New)->setTailCall();
+ }
+ Args.clear();
+ AttributesVec.clear();
+
+ // Update the alias analysis implementation to know that we are replacing
+ // the old call with a new one.
+ AA.replaceWithNewValue(Call, New);
+
+ // Update the callgraph to know that the callsite has been transformed.
+ CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
+ CalleeNode->replaceCallEdge(Call, New, NF_CGN);
+
+ if (!Call->use_empty()) {
+ Call->replaceAllUsesWith(New);
+ New->takeName(Call);
+ }
+
+ // Finally, remove the old call from the program, reducing the use-count of
+ // F.
+ Call->eraseFromParent();
+ }
+
+ // Since we have now created the new function, splice the body of the old
+ // function right into the new function, leaving the old rotting hulk of the
+ // function empty.
+ NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+
+ // Loop over the argument list, transfering uses of the old arguments over to
+ // the new arguments, also transfering over the names as well.
+ //
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
+ I2 = NF->arg_begin(); I != E; ++I) {
+ if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
+ // If this is an unmodified argument, move the name and users over to the
+ // new version.
+ I->replaceAllUsesWith(I2);
+ I2->takeName(I);
+ AA.replaceWithNewValue(I, I2);
+ ++I2;
+ continue;
+ }
+
+ if (ByValArgsToTransform.count(I)) {
+ // In the callee, we create an alloca, and store each of the new incoming
+ // arguments into the alloca.
+ Instruction *InsertPt = NF->begin()->begin();
+
+ // Just add all the struct element types.
+ const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt);
+ const StructType *STy = cast<StructType>(AgTy);
+ Value *Idxs[2] = {
+ ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
+
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
+ Value *Idx =
+ GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2,
+ TheAlloca->getName()+"."+Twine(i),
+ InsertPt);
+ I2->setName(I->getName()+"."+Twine(i));
+ new StoreInst(I2++, Idx, InsertPt);
+ }
+
+ // Anything that used the arg should now use the alloca.
+ I->replaceAllUsesWith(TheAlloca);
+ TheAlloca->takeName(I);
+ AA.replaceWithNewValue(I, TheAlloca);
+ continue;
+ }
+
+ if (I->use_empty()) {
+ AA.deleteValue(I);
+ continue;
+ }
+
+ // Otherwise, if we promoted this argument, then all users are load
+ // instructions (or GEPs with only load users), and all loads should be
+ // using the new argument that we added.
+ ScalarizeTable &ArgIndices = ScalarizedElements[I];
+
+ while (!I->use_empty()) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I->use_back())) {
+ assert(ArgIndices.begin()->empty() &&
+ "Load element should sort to front!");
+ I2->setName(I->getName()+".val");
+ LI->replaceAllUsesWith(I2);
+ AA.replaceWithNewValue(LI, I2);
+ LI->eraseFromParent();
+ DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
+ << "' in function '" << F->getName() << "'\n");
+ } else {
+ GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back());
+ IndicesVector Operands;
+ Operands.reserve(GEP->getNumIndices());
+ for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
+ II != IE; ++II)
+ Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());
+
+ // GEPs with a single 0 index can be merged with direct loads
+ if (Operands.size() == 1 && Operands.front() == 0)
+ Operands.clear();
+
+ Function::arg_iterator TheArg = I2;
+ for (ScalarizeTable::iterator It = ArgIndices.begin();
+ *It != Operands; ++It, ++TheArg) {
+ assert(It != ArgIndices.end() && "GEP not handled??");
+ }
+
+ std::string NewName = I->getName();
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ NewName += "." + utostr(Operands[i]);
+ }
+ NewName += ".val";
+ TheArg->setName(NewName);
+
+ DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
+ << "' of function '" << NF->getName() << "'\n");
+
+ // All of the uses must be load instructions. Replace them all with
+ // the argument specified by ArgNo.
+ while (!GEP->use_empty()) {
+ LoadInst *L = cast<LoadInst>(GEP->use_back());
+ L->replaceAllUsesWith(TheArg);
+ AA.replaceWithNewValue(L, TheArg);
+ L->eraseFromParent();
+ }
+ AA.deleteValue(GEP);
+ GEP->eraseFromParent();
+ }
+ }
+
+ // Increment I2 past all of the arguments added for this promoted pointer.
+ for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i)
+ ++I2;
+ }
+
+ // Notify the alias analysis implementation that we inserted a new argument.
+ if (ExtraArgHack)
+ AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())),
+ NF->arg_begin());
+
+
+ // Tell the alias analysis that the old function is about to disappear.
+ AA.replaceWithNewValue(F, NF);
+
+
+ NF_CGN->stealCalledFunctionsFrom(CG[F]);
+
+ // Now that the old function is dead, delete it. If there is a dangling
+ // reference to the CallgraphNode, just leave the dead function around for
+ // someone else to nuke.
+ CallGraphNode *CGN = CG[F];
+ if (CGN->getNumReferences() == 0)
+ delete CG.removeFunctionFromModule(CGN);
+ else
+ F->setLinkage(Function::ExternalLinkage);
+
+ return NF_CGN;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/CMakeLists.txt b/contrib/llvm/lib/Transforms/IPO/CMakeLists.txt
new file mode 100644
index 0000000..65483e8
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/CMakeLists.txt
@@ -0,0 +1,27 @@
+add_llvm_library(LLVMipo
+ ArgumentPromotion.cpp
+ ConstantMerge.cpp
+ DeadArgumentElimination.cpp
+ DeadTypeElimination.cpp
+ ExtractGV.cpp
+ FunctionAttrs.cpp
+ GlobalDCE.cpp
+ GlobalOpt.cpp
+ IPConstantPropagation.cpp
+ IPO.cpp
+ InlineAlways.cpp
+ InlineSimple.cpp
+ Inliner.cpp
+ Internalize.cpp
+ LoopExtractor.cpp
+ LowerSetJmp.cpp
+ MergeFunctions.cpp
+ PartialInlining.cpp
+ PartialSpecialization.cpp
+ PruneEH.cpp
+ StripDeadPrototypes.cpp
+ StripSymbols.cpp
+ StructRetPromotion.cpp
+ )
+
+target_link_libraries (LLVMipo LLVMScalarOpts LLVMInstCombine)
diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
new file mode 100644
index 0000000..64e8d79
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -0,0 +1,142 @@
+//===- ConstantMerge.cpp - Merge duplicate global constants ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface to a pass that merges duplicate global
+// constants together into a single constant that is shared. This is useful
+// because some passes (ie TraceValues) insert a lot of string constants into
+// the program, regardless of whether or not an existing string is available.
+//
+// Algorithm: ConstantMerge is designed to build up a map of available constants
+// and eliminate duplicates when it is initialized.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "constmerge"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumMerged, "Number of global constants merged");
+
+namespace {
+ struct ConstantMerge : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ ConstantMerge() : ModulePass(ID) {}
+
+ // run - For this pass, process all of the globals in the module,
+ // eliminating duplicate constants.
+ //
+ bool runOnModule(Module &M);
+ };
+}
+
+char ConstantMerge::ID = 0;
+INITIALIZE_PASS(ConstantMerge, "constmerge",
+ "Merge Duplicate Global Constants", false, false);
+
+ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
+
+
+
+/// Find values that are marked as llvm.used.
+static void FindUsedValues(GlobalVariable *LLVMUsed,
+ SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
+ if (LLVMUsed == 0) return;
+ ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
+ if (Inits == 0) return;
+
+ for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
+ if (GlobalValue *GV =
+ dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+ UsedValues.insert(GV);
+}
+
+bool ConstantMerge::runOnModule(Module &M) {
+ // Find all the globals that are marked "used". These cannot be merged.
+ SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
+ FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
+ FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);
+
+ // Map unique constant/section pairs to globals. We don't want to merge
+ // globals in different sections.
+ DenseMap<Constant*, GlobalVariable*> CMap;
+
+ // Replacements - This vector contains a list of replacements to perform.
+ SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements;
+
+ bool MadeChange = false;
+
+ // Iterate constant merging while we are still making progress. Merging two
+ // constants together may allow us to merge other constants together if the
+ // second level constants have initializers which point to the globals that
+ // were just merged.
+ while (1) {
+ // First pass: identify all globals that can be merged together, filling in
+ // the Replacements vector. We cannot do the replacement in this pass
+ // because doing so may cause initializers of other globals to be rewritten,
+ // invalidating the Constant* pointers in CMap.
+ //
+ for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+ GVI != E; ) {
+ GlobalVariable *GV = GVI++;
+
+ // If this GV is dead, remove it.
+ GV->removeDeadConstantUsers();
+ if (GV->use_empty() && GV->hasLocalLinkage()) {
+ GV->eraseFromParent();
+ continue;
+ }
+
+ // Only process constants with initializers in the default addres space.
+ if (!GV->isConstant() ||!GV->hasDefinitiveInitializer() ||
+ GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty() ||
+ // Don't touch values marked with attribute(used).
+ UsedGlobals.count(GV))
+ continue;
+
+
+
+ Constant *Init = GV->getInitializer();
+
+ // Check to see if the initializer is already known.
+ GlobalVariable *&Slot = CMap[Init];
+
+ if (Slot == 0) { // Nope, add it to the map.
+ Slot = GV;
+ } else if (GV->hasLocalLinkage()) { // Yup, this is a duplicate!
+ // Make all uses of the duplicate constant use the canonical version.
+ Replacements.push_back(std::make_pair(GV, Slot));
+ }
+ }
+
+ if (Replacements.empty())
+ return MadeChange;
+ CMap.clear();
+
+ // Now that we have figured out which replacements must be made, do them all
+ // now. This avoid invalidating the pointers in CMap, which are unneeded
+ // now.
+ for (unsigned i = 0, e = Replacements.size(); i != e; ++i) {
+ // Eliminate any uses of the dead global.
+ Replacements[i].first->replaceAllUsesWith(Replacements[i].second);
+
+ // Delete the global value from the module.
+ Replacements[i].first->eraseFromParent();
+ }
+
+ NumMerged += Replacements.size();
+ Replacements.clear();
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
new file mode 100644
index 0000000..47df235
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -0,0 +1,939 @@
+//===-- DeadArgumentElimination.cpp - Eliminate dead arguments ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass deletes dead arguments from internal functions. Dead argument
+// elimination removes arguments which are directly dead, as well as arguments
+// only passed into function calls as dead arguments of other functions. This
+// pass also deletes dead return values in a similar way.
+//
+// This pass is often useful as a cleanup pass to run after aggressive
+// interprocedural passes, which add possibly-dead arguments or return values.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "deadargelim"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constant.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include <map>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumArgumentsEliminated, "Number of unread args removed");
+STATISTIC(NumRetValsEliminated , "Number of unused return values removed");
+
+namespace {
+ /// DAE - The dead argument elimination pass.
+ ///
+ class DAE : public ModulePass {
+ public:
+
+ /// Struct that represents (part of) either a return value or a function
+ /// argument. Used so that arguments and return values can be used
+ /// interchangably.
+ struct RetOrArg {
+ RetOrArg(const Function *F, unsigned Idx, bool IsArg) : F(F), Idx(Idx),
+ IsArg(IsArg) {}
+ const Function *F;
+ unsigned Idx;
+ bool IsArg;
+
+ /// Make RetOrArg comparable, so we can put it into a map.
+ bool operator<(const RetOrArg &O) const {
+ if (F != O.F)
+ return F < O.F;
+ else if (Idx != O.Idx)
+ return Idx < O.Idx;
+ else
+ return IsArg < O.IsArg;
+ }
+
+ /// Make RetOrArg comparable, so we can easily iterate the multimap.
+ bool operator==(const RetOrArg &O) const {
+ return F == O.F && Idx == O.Idx && IsArg == O.IsArg;
+ }
+
+ std::string getDescription() const {
+ return std::string((IsArg ? "Argument #" : "Return value #"))
+ + utostr(Idx) + " of function " + F->getNameStr();
+ }
+ };
+
+ /// Liveness enum - During our initial pass over the program, we determine
+ /// that things are either alive or maybe alive. We don't mark anything
+ /// explicitly dead (even if we know they are), since anything not alive
+ /// with no registered uses (in Uses) will never be marked alive and will
+ /// thus become dead in the end.
+ enum Liveness { Live, MaybeLive };
+
+ /// Convenience wrapper
+ RetOrArg CreateRet(const Function *F, unsigned Idx) {
+ return RetOrArg(F, Idx, false);
+ }
+ /// Convenience wrapper
+ RetOrArg CreateArg(const Function *F, unsigned Idx) {
+ return RetOrArg(F, Idx, true);
+ }
+
+ typedef std::multimap<RetOrArg, RetOrArg> UseMap;
+ /// This maps a return value or argument to any MaybeLive return values or
+ /// arguments it uses. This allows the MaybeLive values to be marked live
+ /// when any of its users is marked live.
+ /// For example (indices are left out for clarity):
+ /// - Uses[ret F] = ret G
+ /// This means that F calls G, and F returns the value returned by G.
+ /// - Uses[arg F] = ret G
+ /// This means that some function calls G and passes its result as an
+ /// argument to F.
+ /// - Uses[ret F] = arg F
+ /// This means that F returns one of its own arguments.
+ /// - Uses[arg F] = arg G
+ /// This means that G calls F and passes one of its own (G's) arguments
+ /// directly to F.
+ UseMap Uses;
+
+ typedef std::set<RetOrArg> LiveSet;
+ typedef std::set<const Function*> LiveFuncSet;
+
+ /// This set contains all values that have been determined to be live.
+ LiveSet LiveValues;
+ /// This set contains all values that are cannot be changed in any way.
+ LiveFuncSet LiveFunctions;
+
+ typedef SmallVector<RetOrArg, 5> UseVector;
+
+ protected:
+ // DAH uses this to specify a different ID.
+ explicit DAE(char &ID) : ModulePass(ID) {}
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ DAE() : ModulePass(ID) {}
+
+ bool runOnModule(Module &M);
+
+ virtual bool ShouldHackArguments() const { return false; }
+
+ private:
+ Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses);
+ Liveness SurveyUse(Value::const_use_iterator U, UseVector &MaybeLiveUses,
+ unsigned RetValNum = 0);
+ Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses);
+
+ void SurveyFunction(const Function &F);
+ void MarkValue(const RetOrArg &RA, Liveness L,
+ const UseVector &MaybeLiveUses);
+ void MarkLive(const RetOrArg &RA);
+ void MarkLive(const Function &F);
+ void PropagateLiveness(const RetOrArg &RA);
+ bool RemoveDeadStuffFromFunction(Function *F);
+ bool DeleteDeadVarargs(Function &Fn);
+ };
+}
+
+
+char DAE::ID = 0;
+INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false);
+
+namespace {
+ /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but
+ /// deletes arguments to functions which are external. This is only for use
+ /// by bugpoint.
+ struct DAH : public DAE {
+ static char ID;
+ DAH() : DAE(ID) {}
+
+ virtual bool ShouldHackArguments() const { return true; }
+ };
+}
+
+char DAH::ID = 0;
+INITIALIZE_PASS(DAH, "deadarghaX0r",
+ "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)",
+ false, false);
+
+/// createDeadArgEliminationPass - This pass removes arguments from functions
+/// which are not used by the body of the function.
+///
+ModulePass *llvm::createDeadArgEliminationPass() { return new DAE(); }
+ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); }
+
+/// DeleteDeadVarargs - If this is an function that takes a ... list, and if
+/// llvm.vastart is never called, the varargs list is dead for the function.
+bool DAE::DeleteDeadVarargs(Function &Fn) {
+ assert(Fn.getFunctionType()->isVarArg() && "Function isn't varargs!");
+ if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false;
+
+ // Ensure that the function is only directly called.
+ if (Fn.hasAddressTaken())
+ return false;
+
+ // Okay, we know we can transform this function if safe. Scan its body
+ // looking for calls to llvm.vastart.
+ for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::vastart)
+ return false;
+ }
+ }
+ }
+
+ // If we get here, there are no calls to llvm.vastart in the function body,
+ // remove the "..." and adjust all the calls.
+
+ // Start by computing a new prototype for the function, which is the same as
+ // the old function, but doesn't have isVarArg set.
+ const FunctionType *FTy = Fn.getFunctionType();
+
+ std::vector<const Type*> Params(FTy->param_begin(), FTy->param_end());
+ FunctionType *NFTy = FunctionType::get(FTy->getReturnType(),
+ Params, false);
+ unsigned NumArgs = Params.size();
+
+ // Create the new function body and insert it into the module...
+ Function *NF = Function::Create(NFTy, Fn.getLinkage());
+ NF->copyAttributesFrom(&Fn);
+ Fn.getParent()->getFunctionList().insert(&Fn, NF);
+ NF->takeName(&Fn);
+
+ // Loop over all of the callers of the function, transforming the call sites
+ // to pass in a smaller number of arguments into the new function.
+ //
+ std::vector<Value*> Args;
+ while (!Fn.use_empty()) {
+ CallSite CS(Fn.use_back());
+ Instruction *Call = CS.getInstruction();
+
+ // Pass all the same arguments.
+ Args.assign(CS.arg_begin(), CS.arg_begin() + NumArgs);
+
+ // Drop any attributes that were on the vararg arguments.
+ AttrListPtr PAL = CS.getAttributes();
+ if (!PAL.isEmpty() && PAL.getSlot(PAL.getNumSlots() - 1).Index > NumArgs) {
+ SmallVector<AttributeWithIndex, 8> AttributesVec;
+ for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i)
+ AttributesVec.push_back(PAL.getSlot(i));
+ if (Attributes FnAttrs = PAL.getFnAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+ PAL = AttrListPtr::get(AttributesVec.begin(), AttributesVec.end());
+ }
+
+ Instruction *New;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args.begin(), Args.end(), "", Call);
+ cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<InvokeInst>(New)->setAttributes(PAL);
+ } else {
+ New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
+ cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<CallInst>(New)->setAttributes(PAL);
+ if (cast<CallInst>(Call)->isTailCall())
+ cast<CallInst>(New)->setTailCall();
+ }
+ New->setDebugLoc(Call->getDebugLoc());
+
+ Args.clear();
+
+ if (!Call->use_empty())
+ Call->replaceAllUsesWith(New);
+
+ New->takeName(Call);
+
+ // Finally, remove the old call from the program, reducing the use-count of
+ // F.
+ Call->eraseFromParent();
+ }
+
+ // Since we have now created the new function, splice the body of the old
+ // function right into the new function, leaving the old rotting hulk of the
+ // function empty.
+ NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList());
+
+ // Loop over the argument list, transfering uses of the old arguments over to
+ // the new arguments, also transfering over the names as well. While we're at
+ // it, remove the dead arguments from the DeadArguments list.
+ //
+ for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(),
+ I2 = NF->arg_begin(); I != E; ++I, ++I2) {
+ // Move the name and users over to the new version.
+ I->replaceAllUsesWith(I2);
+ I2->takeName(I);
+ }
+
+ // Finally, nuke the old function.
+ Fn.eraseFromParent();
+ return true;
+}
+
+/// Convenience function that returns the number of return values. It returns 0
+/// for void functions and 1 for functions not returning a struct. It returns
+/// the number of struct elements for functions returning a struct.
+static unsigned NumRetVals(const Function *F) {
+ if (F->getReturnType()->isVoidTy())
+ return 0;
+ else if (const StructType *STy = dyn_cast<StructType>(F->getReturnType()))
+ return STy->getNumElements();
+ else
+ return 1;
+}
+
+/// MarkIfNotLive - This checks Use for liveness in LiveValues. If Use is not
+/// live, it adds Use to the MaybeLiveUses argument. Returns the determined
+/// liveness of Use.
+DAE::Liveness DAE::MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) {
+ // We're live if our use or its Function is already marked as live.
+ if (LiveFunctions.count(Use.F) || LiveValues.count(Use))
+ return Live;
+
+ // We're maybe live otherwise, but remember that we must become live if
+ // Use becomes live.
+ MaybeLiveUses.push_back(Use);
+ return MaybeLive;
+}
+
+
+/// SurveyUse - This looks at a single use of an argument or return value
+/// and determines if it should be alive or not. Adds this use to MaybeLiveUses
+/// if it causes the used value to become MaybeLive.
+///
+/// RetValNum is the return value number to use when this use is used in a
+/// return instruction. This is used in the recursion, you should always leave
+/// it at 0.
+DAE::Liveness DAE::SurveyUse(Value::const_use_iterator U,
+ UseVector &MaybeLiveUses, unsigned RetValNum) {
+ const User *V = *U;
+ if (const ReturnInst *RI = dyn_cast<ReturnInst>(V)) {
+ // The value is returned from a function. It's only live when the
+ // function's return value is live. We use RetValNum here, for the case
+ // that U is really a use of an insertvalue instruction that uses the
+ // orginal Use.
+ RetOrArg Use = CreateRet(RI->getParent()->getParent(), RetValNum);
+ // We might be live, depending on the liveness of Use.
+ return MarkIfNotLive(Use, MaybeLiveUses);
+ }
+ if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) {
+ if (U.getOperandNo() != InsertValueInst::getAggregateOperandIndex()
+ && IV->hasIndices())
+ // The use we are examining is inserted into an aggregate. Our liveness
+ // depends on all uses of that aggregate, but if it is used as a return
+ // value, only index at which we were inserted counts.
+ RetValNum = *IV->idx_begin();
+
+ // Note that if we are used as the aggregate operand to the insertvalue,
+ // we don't change RetValNum, but do survey all our uses.
+
+ Liveness Result = MaybeLive;
+ for (Value::const_use_iterator I = IV->use_begin(),
+ E = V->use_end(); I != E; ++I) {
+ Result = SurveyUse(I, MaybeLiveUses, RetValNum);
+ if (Result == Live)
+ break;
+ }
+ return Result;
+ }
+
+ if (ImmutableCallSite CS = V) {
+ const Function *F = CS.getCalledFunction();
+ if (F) {
+ // Used in a direct call.
+
+ // Find the argument number. We know for sure that this use is an
+ // argument, since if it was the function argument this would be an
+ // indirect call and the we know can't be looking at a value of the
+ // label type (for the invoke instruction).
+ unsigned ArgNo = CS.getArgumentNo(U);
+
+ if (ArgNo >= F->getFunctionType()->getNumParams())
+ // The value is passed in through a vararg! Must be live.
+ return Live;
+
+ assert(CS.getArgument(ArgNo)
+ == CS->getOperand(U.getOperandNo())
+ && "Argument is not where we expected it");
+
+ // Value passed to a normal call. It's only live when the corresponding
+ // argument to the called function turns out live.
+ RetOrArg Use = CreateArg(F, ArgNo);
+ return MarkIfNotLive(Use, MaybeLiveUses);
+ }
+ }
+ // Used in any other way? Value must be live.
+ return Live;
+}
+
+/// SurveyUses - This looks at all the uses of the given value
+/// Returns the Liveness deduced from the uses of this value.
+///
+/// Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses. If
+/// the result is Live, MaybeLiveUses might be modified but its content should
+/// be ignored (since it might not be complete).
+DAE::Liveness DAE::SurveyUses(const Value *V, UseVector &MaybeLiveUses) {
+ // Assume it's dead (which will only hold if there are no uses at all..).
+ Liveness Result = MaybeLive;
+ // Check each use.
+ for (Value::const_use_iterator I = V->use_begin(),
+ E = V->use_end(); I != E; ++I) {
+ Result = SurveyUse(I, MaybeLiveUses);
+ if (Result == Live)
+ break;
+ }
+ return Result;
+}
+
+// SurveyFunction - This performs the initial survey of the specified function,
+// checking out whether or not it uses any of its incoming arguments or whether
+// any callers use the return value. This fills in the LiveValues set and Uses
+// map.
+//
+// We consider arguments of non-internal functions to be intrinsically alive as
+// well as arguments to functions which have their "address taken".
+//
+void DAE::SurveyFunction(const Function &F) {
+ unsigned RetCount = NumRetVals(&F);
+ // Assume all return values are dead
+ typedef SmallVector<Liveness, 5> RetVals;
+ RetVals RetValLiveness(RetCount, MaybeLive);
+
+ typedef SmallVector<UseVector, 5> RetUses;
+ // These vectors map each return value to the uses that make it MaybeLive, so
+ // we can add those to the Uses map if the return value really turns out to be
+ // MaybeLive. Initialized to a list of RetCount empty lists.
+ RetUses MaybeLiveRetUses(RetCount);
+
+ for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
+ if (RI->getNumOperands() != 0 && RI->getOperand(0)->getType()
+ != F.getFunctionType()->getReturnType()) {
+ // We don't support old style multiple return values.
+ MarkLive(F);
+ return;
+ }
+
+ if (!F.hasLocalLinkage() && (!ShouldHackArguments() || F.isIntrinsic())) {
+ MarkLive(F);
+ return;
+ }
+
+ DEBUG(dbgs() << "DAE - Inspecting callers for fn: " << F.getName() << "\n");
+ // Keep track of the number of live retvals, so we can skip checks once all
+ // of them turn out to be live.
+ unsigned NumLiveRetVals = 0;
+ const Type *STy = dyn_cast<StructType>(F.getReturnType());
+ // Loop all uses of the function.
+ for (Value::const_use_iterator I = F.use_begin(), E = F.use_end();
+ I != E; ++I) {
+ // If the function is PASSED IN as an argument, its address has been
+ // taken.
+ ImmutableCallSite CS(*I);
+ if (!CS || !CS.isCallee(I)) {
+ MarkLive(F);
+ return;
+ }
+
+ // If this use is anything other than a call site, the function is alive.
+ const Instruction *TheCall = CS.getInstruction();
+ if (!TheCall) { // Not a direct call site?
+ MarkLive(F);
+ return;
+ }
+
+ // If we end up here, we are looking at a direct call to our function.
+
+ // Now, check how our return value(s) is/are used in this caller. Don't
+ // bother checking return values if all of them are live already.
+ if (NumLiveRetVals != RetCount) {
+ if (STy) {
+ // Check all uses of the return value.
+ for (Value::const_use_iterator I = TheCall->use_begin(),
+ E = TheCall->use_end(); I != E; ++I) {
+ const ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(*I);
+ if (Ext && Ext->hasIndices()) {
+ // This use uses a part of our return value, survey the uses of
+ // that part and store the results for this index only.
+ unsigned Idx = *Ext->idx_begin();
+ if (RetValLiveness[Idx] != Live) {
+ RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]);
+ if (RetValLiveness[Idx] == Live)
+ NumLiveRetVals++;
+ }
+ } else {
+ // Used by something else than extractvalue. Mark all return
+ // values as live.
+ for (unsigned i = 0; i != RetCount; ++i )
+ RetValLiveness[i] = Live;
+ NumLiveRetVals = RetCount;
+ break;
+ }
+ }
+ } else {
+ // Single return value
+ RetValLiveness[0] = SurveyUses(TheCall, MaybeLiveRetUses[0]);
+ if (RetValLiveness[0] == Live)
+ NumLiveRetVals = RetCount;
+ }
+ }
+ }
+
+ // Now we've inspected all callers, record the liveness of our return values.
+ for (unsigned i = 0; i != RetCount; ++i)
+ MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]);
+
+ DEBUG(dbgs() << "DAE - Inspecting args for fn: " << F.getName() << "\n");
+
+ // Now, check all of our arguments.
+ unsigned i = 0;
+ UseVector MaybeLiveArgUses;
+ for (Function::const_arg_iterator AI = F.arg_begin(),
+ E = F.arg_end(); AI != E; ++AI, ++i) {
+ // See what the effect of this use is (recording any uses that cause
+ // MaybeLive in MaybeLiveArgUses).
+ Liveness Result = SurveyUses(AI, MaybeLiveArgUses);
+ // Mark the result.
+ MarkValue(CreateArg(&F, i), Result, MaybeLiveArgUses);
+ // Clear the vector again for the next iteration.
+ MaybeLiveArgUses.clear();
+ }
+}
+
+/// MarkValue - This function marks the liveness of RA depending on L. If L is
+/// MaybeLive, it also takes all uses in MaybeLiveUses and records them in Uses,
+/// such that RA will be marked live if any use in MaybeLiveUses gets marked
+/// live later on.
+void DAE::MarkValue(const RetOrArg &RA, Liveness L,
+ const UseVector &MaybeLiveUses) {
+ switch (L) {
+ case Live: MarkLive(RA); break;
+ case MaybeLive:
+ {
+ // Note any uses of this value, so this return value can be
+ // marked live whenever one of the uses becomes live.
+ for (UseVector::const_iterator UI = MaybeLiveUses.begin(),
+ UE = MaybeLiveUses.end(); UI != UE; ++UI)
+ Uses.insert(std::make_pair(*UI, RA));
+ break;
+ }
+ }
+}
+
+/// MarkLive - Mark the given Function as alive, meaning that it cannot be
+/// changed in any way. Additionally,
+/// mark any values that are used as this function's parameters or by its return
+/// values (according to Uses) live as well.
+void DAE::MarkLive(const Function &F) {
+ DEBUG(dbgs() << "DAE - Intrinsically live fn: " << F.getName() << "\n");
+ // Mark the function as live.
+ LiveFunctions.insert(&F);
+ // Mark all arguments as live.
+ for (unsigned i = 0, e = F.arg_size(); i != e; ++i)
+ PropagateLiveness(CreateArg(&F, i));
+ // Mark all return values as live.
+ for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i)
+ PropagateLiveness(CreateRet(&F, i));
+}
+
+/// MarkLive - Mark the given return value or argument as live. Additionally,
+/// mark any values that are used by this value (according to Uses) live as
+/// well.
+void DAE::MarkLive(const RetOrArg &RA) {
+ if (LiveFunctions.count(RA.F))
+ return; // Function was already marked Live.
+
+ if (!LiveValues.insert(RA).second)
+ return; // We were already marked Live.
+
+ DEBUG(dbgs() << "DAE - Marking " << RA.getDescription() << " live\n");
+ PropagateLiveness(RA);
+}
+
+/// PropagateLiveness - Given that RA is a live value, propagate it's liveness
+/// to any other values it uses (according to Uses).
+void DAE::PropagateLiveness(const RetOrArg &RA) {
+ // We don't use upper_bound (or equal_range) here, because our recursive call
+ // to ourselves is likely to cause the upper_bound (which is the first value
+ // not belonging to RA) to become erased and the iterator invalidated.
+ UseMap::iterator Begin = Uses.lower_bound(RA);
+ UseMap::iterator E = Uses.end();
+ UseMap::iterator I;
+ for (I = Begin; I != E && I->first == RA; ++I)
+ MarkLive(I->second);
+
+ // Erase RA from the Uses map (from the lower bound to wherever we ended up
+ // after the loop).
+ Uses.erase(Begin, I);
+}
+
+// RemoveDeadStuffFromFunction - Remove any arguments and return values from F
+// that are not in LiveValues. Transform the function and all of the callees of
+// the function to not have these arguments and return values.
+//
+bool DAE::RemoveDeadStuffFromFunction(Function *F) {
+ // Don't modify fully live functions
+ if (LiveFunctions.count(F))
+ return false;
+
+ // Start by computing a new prototype for the function, which is the same as
+ // the old function, but has fewer arguments and a different return type.
+ const FunctionType *FTy = F->getFunctionType();
+ std::vector<const Type*> Params;
+
+ // Set up to build a new list of parameter attributes.
+ SmallVector<AttributeWithIndex, 8> AttributesVec;
+ const AttrListPtr &PAL = F->getAttributes();
+
+ // The existing function return attributes.
+ Attributes RAttrs = PAL.getRetAttributes();
+ Attributes FnAttrs = PAL.getFnAttributes();
+
+ // Find out the new return value.
+
+ const Type *RetTy = FTy->getReturnType();
+ const Type *NRetTy = NULL;
+ unsigned RetCount = NumRetVals(F);
+
+ // -1 means unused, other numbers are the new index
+ SmallVector<int, 5> NewRetIdxs(RetCount, -1);
+ std::vector<const Type*> RetTypes;
+ if (RetTy->isVoidTy()) {
+ NRetTy = RetTy;
+ } else {
+ const StructType *STy = dyn_cast<StructType>(RetTy);
+ if (STy)
+ // Look at each of the original return values individually.
+ for (unsigned i = 0; i != RetCount; ++i) {
+ RetOrArg Ret = CreateRet(F, i);
+ if (LiveValues.erase(Ret)) {
+ RetTypes.push_back(STy->getElementType(i));
+ NewRetIdxs[i] = RetTypes.size() - 1;
+ } else {
+ ++NumRetValsEliminated;
+ DEBUG(dbgs() << "DAE - Removing return value " << i << " from "
+ << F->getName() << "\n");
+ }
+ }
+ else
+ // We used to return a single value.
+ if (LiveValues.erase(CreateRet(F, 0))) {
+ RetTypes.push_back(RetTy);
+ NewRetIdxs[0] = 0;
+ } else {
+ DEBUG(dbgs() << "DAE - Removing return value from " << F->getName()
+ << "\n");
+ ++NumRetValsEliminated;
+ }
+ if (RetTypes.size() > 1)
+ // More than one return type? Return a struct with them. Also, if we used
+ // to return a struct and didn't change the number of return values,
+ // return a struct again. This prevents changing {something} into
+ // something and {} into void.
+ // Make the new struct packed if we used to return a packed struct
+ // already.
+ NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked());
+ else if (RetTypes.size() == 1)
+ // One return type? Just a simple value then, but only if we didn't use to
+ // return a struct with that simple value before.
+ NRetTy = RetTypes.front();
+ else if (RetTypes.size() == 0)
+ // No return types? Make it void, but only if we didn't use to return {}.
+ NRetTy = Type::getVoidTy(F->getContext());
+ }
+
+ assert(NRetTy && "No new return type found?");
+
+ // Remove any incompatible attributes, but only if we removed all return
+ // values. Otherwise, ensure that we don't have any conflicting attributes
+ // here. Currently, this should not be possible, but special handling might be
+ // required when new return value attributes are added.
+ if (NRetTy->isVoidTy())
+ RAttrs &= ~Attribute::typeIncompatible(NRetTy);
+ else
+ assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0
+ && "Return attributes no longer compatible?");
+
+ if (RAttrs)
+ AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs));
+
+ // Remember which arguments are still alive.
+ SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false);
+ // Construct the new parameter list from non-dead arguments. Also construct
+ // a new set of parameter attributes to correspond. Skip the first parameter
+ // attribute, since that belongs to the return value.
+ unsigned i = 0;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++i) {
+ RetOrArg Arg = CreateArg(F, i);
+ if (LiveValues.erase(Arg)) {
+ Params.push_back(I->getType());
+ ArgAlive[i] = true;
+
+ // Get the original parameter attributes (skipping the first one, that is
+ // for the return value.
+ if (Attributes Attrs = PAL.getParamAttributes(i + 1))
+ AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs));
+ } else {
+ ++NumArgumentsEliminated;
+ DEBUG(dbgs() << "DAE - Removing argument " << i << " (" << I->getName()
+ << ") from " << F->getName() << "\n");
+ }
+ }
+
+ if (FnAttrs != Attribute::None)
+ AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+ // Reconstruct the AttributesList based on the vector we constructed.
+ AttrListPtr NewPAL = AttrListPtr::get(AttributesVec.begin(),
+ AttributesVec.end());
+
+ // Create the new function type based on the recomputed parameters.
+ FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg());
+
+ // No change?
+ if (NFTy == FTy)
+ return false;
+
+ // Create the new function body and insert it into the module...
+ Function *NF = Function::Create(NFTy, F->getLinkage());
+ NF->copyAttributesFrom(F);
+ NF->setAttributes(NewPAL);
+ // Insert the new function before the old function, so we won't be processing
+ // it again.
+ F->getParent()->getFunctionList().insert(F, NF);
+ NF->takeName(F);
+
+ // Loop over all of the callers of the function, transforming the call sites
+ // to pass in a smaller number of arguments into the new function.
+ //
+ std::vector<Value*> Args;
+ while (!F->use_empty()) {
+ CallSite CS(F->use_back());
+ Instruction *Call = CS.getInstruction();
+
+ AttributesVec.clear();
+ const AttrListPtr &CallPAL = CS.getAttributes();
+
+ // The call return attributes.
+ Attributes RAttrs = CallPAL.getRetAttributes();
+ Attributes FnAttrs = CallPAL.getFnAttributes();
+ // Adjust in case the function was changed to return void.
+ RAttrs &= ~Attribute::typeIncompatible(NF->getReturnType());
+ if (RAttrs)
+ AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs));
+
+ // Declare these outside of the loops, so we can reuse them for the second
+ // loop, which loops the varargs.
+ CallSite::arg_iterator I = CS.arg_begin();
+ unsigned i = 0;
+ // Loop over those operands, corresponding to the normal arguments to the
+ // original function, and add those that are still alive.
+ for (unsigned e = FTy->getNumParams(); i != e; ++I, ++i)
+ if (ArgAlive[i]) {
+ Args.push_back(*I);
+ // Get original parameter attributes, but skip return attributes.
+ if (Attributes Attrs = CallPAL.getParamAttributes(i + 1))
+ AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+ }
+
+ // Push any varargs arguments on the list. Don't forget their attributes.
+ for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) {
+ Args.push_back(*I);
+ if (Attributes Attrs = CallPAL.getParamAttributes(i + 1))
+ AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+ }
+
+ if (FnAttrs != Attribute::None)
+ AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+ // Reconstruct the AttributesList based on the vector we constructed.
+ AttrListPtr NewCallPAL = AttrListPtr::get(AttributesVec.begin(),
+ AttributesVec.end());
+
+ Instruction *New;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args.begin(), Args.end(), "", Call);
+ cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<InvokeInst>(New)->setAttributes(NewCallPAL);
+ } else {
+ New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
+ cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<CallInst>(New)->setAttributes(NewCallPAL);
+ if (cast<CallInst>(Call)->isTailCall())
+ cast<CallInst>(New)->setTailCall();
+ }
+ New->setDebugLoc(Call->getDebugLoc());
+
+ Args.clear();
+
+ if (!Call->use_empty()) {
+ if (New->getType() == Call->getType()) {
+ // Return type not changed? Just replace users then.
+ Call->replaceAllUsesWith(New);
+ New->takeName(Call);
+ } else if (New->getType()->isVoidTy()) {
+ // Our return value has uses, but they will get removed later on.
+ // Replace by null for now.
+ Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
+ } else {
+ assert(RetTy->isStructTy() &&
+ "Return type changed, but not into a void. The old return type"
+ " must have been a struct!");
+ Instruction *InsertPt = Call;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ BasicBlock::iterator IP = II->getNormalDest()->begin();
+ while (isa<PHINode>(IP)) ++IP;
+ InsertPt = IP;
+ }
+
+ // We used to return a struct. Instead of doing smart stuff with all the
+ // uses of this struct, we will just rebuild it using
+ // extract/insertvalue chaining and let instcombine clean that up.
+ //
+ // Start out building up our return value from undef
+ Value *RetVal = UndefValue::get(RetTy);
+ for (unsigned i = 0; i != RetCount; ++i)
+ if (NewRetIdxs[i] != -1) {
+ Value *V;
+ if (RetTypes.size() > 1)
+ // We are still returning a struct, so extract the value from our
+ // return value
+ V = ExtractValueInst::Create(New, NewRetIdxs[i], "newret",
+ InsertPt);
+ else
+ // We are now returning a single element, so just insert that
+ V = New;
+ // Insert the value at the old position
+ RetVal = InsertValueInst::Create(RetVal, V, i, "oldret", InsertPt);
+ }
+ // Now, replace all uses of the old call instruction with the return
+ // struct we built
+ Call->replaceAllUsesWith(RetVal);
+ New->takeName(Call);
+ }
+ }
+
+ // Finally, remove the old call from the program, reducing the use-count of
+ // F.
+ Call->eraseFromParent();
+ }
+
+ // Since we have now created the new function, splice the body of the old
+ // function right into the new function, leaving the old rotting hulk of the
+ // function empty.
+ NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+
+ // Loop over the argument list, transfering uses of the old arguments over to
+ // the new arguments, also transfering over the names as well.
+ i = 0;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
+ I2 = NF->arg_begin(); I != E; ++I, ++i)
+ if (ArgAlive[i]) {
+ // If this is a live argument, move the name and users over to the new
+ // version.
+ I->replaceAllUsesWith(I2);
+ I2->takeName(I);
+ ++I2;
+ } else {
+ // If this argument is dead, replace any uses of it with null constants
+ // (these are guaranteed to become unused later on).
+ I->replaceAllUsesWith(Constant::getNullValue(I->getType()));
+ }
+
+ // If we change the return value of the function we must rewrite any return
+ // instructions. Check this now.
+ if (F->getReturnType() != NF->getReturnType())
+ for (Function::iterator BB = NF->begin(), E = NF->end(); BB != E; ++BB)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ Value *RetVal;
+
+ if (NFTy->getReturnType()->isVoidTy()) {
+ RetVal = 0;
+ } else {
+ assert (RetTy->isStructTy());
+ // The original return value was a struct, insert
+ // extractvalue/insertvalue chains to extract only the values we need
+ // to return and insert them into our new result.
+ // This does generate messy code, but we'll let it to instcombine to
+ // clean that up.
+ Value *OldRet = RI->getOperand(0);
+ // Start out building up our return value from undef
+ RetVal = UndefValue::get(NRetTy);
+ for (unsigned i = 0; i != RetCount; ++i)
+ if (NewRetIdxs[i] != -1) {
+ ExtractValueInst *EV = ExtractValueInst::Create(OldRet, i,
+ "oldret", RI);
+ if (RetTypes.size() > 1) {
+ // We're still returning a struct, so reinsert the value into
+ // our new return value at the new index
+
+ RetVal = InsertValueInst::Create(RetVal, EV, NewRetIdxs[i],
+ "newret", RI);
+ } else {
+ // We are now only returning a simple value, so just return the
+ // extracted value.
+ RetVal = EV;
+ }
+ }
+ }
+ // Replace the return instruction with one returning the new return
+ // value (possibly 0 if we became void).
+ ReturnInst::Create(F->getContext(), RetVal, RI);
+ BB->getInstList().erase(RI);
+ }
+
+ // Now that the old function is dead, delete it.
+ F->eraseFromParent();
+
+ return true;
+}
+
+bool DAE::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // First pass: Do a simple check to see if any functions can have their "..."
+ // removed. We can do this if they never call va_start. This loop cannot be
+ // fused with the next loop, because deleting a function invalidates
+ // information computed while surveying other functions.
+ DEBUG(dbgs() << "DAE - Deleting dead varargs\n");
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+ Function &F = *I++;
+ if (F.getFunctionType()->isVarArg())
+ Changed |= DeleteDeadVarargs(F);
+ }
+
+ // Second phase:loop through the module, determining which arguments are live.
+ // We assume all arguments are dead unless proven otherwise (allowing us to
+ // determine that dead arguments passed into recursive functions are dead).
+ //
+ DEBUG(dbgs() << "DAE - Determining liveness\n");
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ SurveyFunction(*I);
+
+ // Now, remove all dead arguments and return values from each function in
+ // turn.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+ // Increment now, because the function will probably get removed (ie.
+ // replaced by a new one).
+ Function *F = I++;
+ Changed |= RemoveDeadStuffFromFunction(F);
+ }
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp
new file mode 100644
index 0000000..5dc50c5
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp
@@ -0,0 +1,106 @@
+//===- DeadTypeElimination.cpp - Eliminate unused types for symbol table --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to cleanup the output of GCC. It eliminate names for types
+// that are unused in the entire translation unit, using the FindUsedTypes pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "deadtypeelim"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumKilled, "Number of unused typenames removed from symtab");
+
+namespace {
+ struct DTE : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ DTE() : ModulePass(ID) {}
+
+ // doPassInitialization - For this pass, it removes global symbol table
+ // entries for primitive types. These are never used for linking in GCC and
+ // they make the output uglier to look at, so we nuke them.
+ //
+ // Also, initialize instance variables.
+ //
+ bool runOnModule(Module &M);
+
+ // getAnalysisUsage - This function needs FindUsedTypes to do its job...
+ //
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<FindUsedTypes>();
+ }
+ };
+}
+
+char DTE::ID = 0;
+INITIALIZE_PASS(DTE, "deadtypeelim", "Dead Type Elimination", false, false);
+
+ModulePass *llvm::createDeadTypeEliminationPass() {
+ return new DTE();
+}
+
+
+// ShouldNukeSymtabEntry - Return true if this module level symbol table entry
+// should be eliminated.
+//
+static inline bool ShouldNukeSymtabEntry(const Type *Ty){
+ // Nuke all names for primitive types!
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy())
+ return true;
+
+ // Nuke all pointers to primitive types as well...
+ if (const PointerType *PT = dyn_cast<PointerType>(Ty))
+ if (PT->getElementType()->isPrimitiveType() ||
+ PT->getElementType()->isIntegerTy())
+ return true;
+
+ return false;
+}
+
+// run - For this pass, it removes global symbol table entries for primitive
+// types. These are never used for linking in GCC and they make the output
+// uglier to look at, so we nuke them. Also eliminate types that are never used
+// in the entire program as indicated by FindUsedTypes.
+//
+bool DTE::runOnModule(Module &M) {
+ bool Changed = false;
+
+ TypeSymbolTable &ST = M.getTypeSymbolTable();
+ std::set<const Type *> UsedTypes = getAnalysis<FindUsedTypes>().getTypes();
+
+ // Check the symbol table for superfluous type entries...
+ //
+ // Grab the 'type' plane of the module symbol...
+ TypeSymbolTable::iterator TI = ST.begin();
+ TypeSymbolTable::iterator TE = ST.end();
+ while ( TI != TE ) {
+ // If this entry should be unconditionally removed, or if we detect that
+ // the type is not used, remove it.
+ const Type *RHS = TI->second;
+ if (ShouldNukeSymtabEntry(RHS) || !UsedTypes.count(RHS)) {
+ ST.remove(TI++);
+ ++NumKilled;
+ Changed = true;
+ } else {
+ ++TI;
+ // We only need to leave one name for each type.
+ UsedTypes.erase(RHS);
+ }
+ }
+
+ return Changed;
+}
+
+// vim: sw=2
diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
new file mode 100644
index 0000000..45c5fe7
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -0,0 +1,82 @@
+//===-- ExtractGV.cpp - Global Value extraction pass ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass extracts global values
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Constants.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SetVector.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+ /// @brief A pass to extract specific functions and their dependencies.
+ class GVExtractorPass : public ModulePass {
+ SetVector<GlobalValue *> Named;
+ bool deleteStuff;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+
+ /// FunctionExtractorPass - If deleteFn is true, this pass deletes as the
+ /// specified function. Otherwise, it deletes as much of the module as
+ /// possible, except for the function specified.
+ ///
+ explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true)
+ : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {}
+
+ bool runOnModule(Module &M) {
+ // Visit the global inline asm.
+ if (!deleteStuff)
+ M.setModuleInlineAsm("");
+
+ // For simplicity, just give all GlobalValues ExternalLinkage. A trickier
+ // implementation could figure out which GlobalValues are actually
+ // referenced by the Named set, and which GlobalValues in the rest of
+ // the module are referenced by the NamedSet, and get away with leaving
+ // more internal and private things internal and private. But for now,
+ // be conservative and simple.
+
+ // Visit the GlobalVariables.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (!I->isDeclaration()) {
+ if (I->hasLocalLinkage())
+ I->setVisibility(GlobalValue::HiddenVisibility);
+ I->setLinkage(GlobalValue::ExternalLinkage);
+ if (deleteStuff == Named.count(I))
+ I->setInitializer(0);
+ }
+
+ // Visit the Functions.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration()) {
+ if (I->hasLocalLinkage())
+ I->setVisibility(GlobalValue::HiddenVisibility);
+ I->setLinkage(GlobalValue::ExternalLinkage);
+ if (deleteStuff == Named.count(I))
+ I->deleteBody();
+ }
+
+ return true;
+ }
+ };
+
+ char GVExtractorPass::ID = 0;
+}
+
+ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue*>& GVs,
+ bool deleteFn) {
+ return new GVExtractorPass(GVs, deleteFn);
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
new file mode 100644
index 0000000..6165ba0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -0,0 +1,391 @@
+//===- FunctionAttrs.cpp - Pass which marks functions readnone or readonly ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple interprocedural pass which walks the
+// call-graph, looking for functions which do not access or only read
+// non-local memory, and marking them readnone/readonly. In addition,
+// it marks function arguments (of pointer type) 'nocapture' if a call
+// to the function does not create any copies of the pointer value that
+// outlive the call. This more or less means that the pointer is only
+// dereferenced, and not returned from the function or stored in a global.
+// This pass is implemented as a bottom-up traversal of the call-graph.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "functionattrs"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/Support/InstIterator.h"
+using namespace llvm;
+
+STATISTIC(NumReadNone, "Number of functions marked readnone");
+STATISTIC(NumReadOnly, "Number of functions marked readonly");
+STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
+STATISTIC(NumNoAlias, "Number of function returns marked noalias");
+
+namespace {
+ struct FunctionAttrs : public CallGraphSCCPass {
+ static char ID; // Pass identification, replacement for typeid
+ FunctionAttrs() : CallGraphSCCPass(ID) {}
+
+ // runOnSCC - Analyze the SCC, performing the transformation if possible.
+ bool runOnSCC(CallGraphSCC &SCC);
+
+ // AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
+ bool AddReadAttrs(const CallGraphSCC &SCC);
+
+ // AddNoCaptureAttrs - Deduce nocapture attributes for the SCC.
+ bool AddNoCaptureAttrs(const CallGraphSCC &SCC);
+
+ // IsFunctionMallocLike - Does this function allocate new memory?
+ bool IsFunctionMallocLike(Function *F,
+ SmallPtrSet<Function*, 8> &) const;
+
+ // AddNoAliasAttrs - Deduce noalias attributes for the SCC.
+ bool AddNoAliasAttrs(const CallGraphSCC &SCC);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ CallGraphSCCPass::getAnalysisUsage(AU);
+ }
+
+ bool PointsToLocalMemory(Value *V);
+ };
+}
+
+char FunctionAttrs::ID = 0;
+INITIALIZE_PASS(FunctionAttrs, "functionattrs",
+ "Deduce function attributes", false, false);
+
+Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
+
+
+/// PointsToLocalMemory - Returns whether the given pointer value points to
+/// memory that is local to the function. Global constants are considered
+/// local to all functions.
+bool FunctionAttrs::PointsToLocalMemory(Value *V) {
+ SmallVector<Value*, 16> Worklist;
+ unsigned MaxLookup = 8;
+
+ Worklist.push_back(V);
+
+ do {
+ V = Worklist.pop_back_val()->getUnderlyingObject();
+
+ // An alloca instruction defines local memory.
+ if (isa<AllocaInst>(V))
+ continue;
+
+ // A global constant counts as local memory for our purposes.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+ if (!GV->isConstant())
+ return false;
+ continue;
+ }
+
+ // If both select values point to local memory, then so does the select.
+ if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+ Worklist.push_back(SI->getTrueValue());
+ Worklist.push_back(SI->getFalseValue());
+ continue;
+ }
+
+ // If all values incoming to a phi node point to local memory, then so does
+ // the phi.
+ if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ // Don't bother inspecting phi nodes with many operands.
+ if (PN->getNumIncomingValues() > MaxLookup)
+ return false;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ Worklist.push_back(PN->getIncomingValue(i));
+ continue;
+ }
+
+ return false;
+ } while (!Worklist.empty() && --MaxLookup);
+
+ return Worklist.empty();
+}
+
+/// AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
+bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
+ SmallPtrSet<Function*, 8> SCCNodes;
+
+ // Fill SCCNodes with the elements of the SCC. Used for quickly
+ // looking up whether a given CallGraphNode is in this SCC.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+ SCCNodes.insert((*I)->getFunction());
+
+ // Check if any of the functions in the SCC read or write memory. If they
+ // write memory then they can't be marked readnone or readonly.
+ bool ReadsMemory = false;
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+
+ if (F == 0)
+ // External node - may write memory. Just give up.
+ return false;
+
+ if (F->doesNotAccessMemory())
+ // Already perfect!
+ continue;
+
+ // Definitions with weak linkage may be overridden at linktime with
+ // something that writes memory, so treat them like declarations.
+ if (F->isDeclaration() || F->mayBeOverridden()) {
+ if (!F->onlyReadsMemory())
+ // May write memory. Just give up.
+ return false;
+
+ ReadsMemory = true;
+ continue;
+ }
+
+ // Scan the function body for instructions that may read or write memory.
+ for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
+ Instruction *I = &*II;
+
+ // Some instructions can be ignored even if they read or write memory.
+ // Detect these now, skipping to the next instruction if one is found.
+ CallSite CS(cast<Value>(I));
+ if (CS && CS.getCalledFunction()) {
+ // Ignore calls to functions in the same SCC.
+ if (SCCNodes.count(CS.getCalledFunction()))
+ continue;
+ // Ignore intrinsics that only access local memory.
+ if (unsigned id = CS.getCalledFunction()->getIntrinsicID())
+ if (AliasAnalysis::getIntrinsicModRefBehavior(id) ==
+ AliasAnalysis::AccessesArguments) {
+ // Check that all pointer arguments point to local memory.
+ for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+ CI != CE; ++CI) {
+ Value *Arg = *CI;
+ if (Arg->getType()->isPointerTy() && !PointsToLocalMemory(Arg))
+ // Writes memory. Just give up.
+ return false;
+ }
+ // Only reads and writes local memory.
+ continue;
+ }
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ // Ignore loads from local memory.
+ if (PointsToLocalMemory(LI->getPointerOperand()))
+ continue;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Ignore stores to local memory.
+ if (PointsToLocalMemory(SI->getPointerOperand()))
+ continue;
+ }
+
+ // Any remaining instructions need to be taken seriously! Check if they
+ // read or write memory.
+ if (I->mayWriteToMemory())
+ // Writes memory. Just give up.
+ return false;
+
+ if (isMalloc(I))
+ // malloc claims not to write memory! PR3754.
+ return false;
+
+ // If this instruction may read memory, remember that.
+ ReadsMemory |= I->mayReadFromMemory();
+ }
+ }
+
+ // Success! Functions in this SCC do not access memory, or only read memory.
+ // Give them the appropriate attribute.
+ bool MadeChange = false;
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+
+ if (F->doesNotAccessMemory())
+ // Already perfect!
+ continue;
+
+ if (F->onlyReadsMemory() && ReadsMemory)
+ // No change.
+ continue;
+
+ MadeChange = true;
+
+ // Clear out any existing attributes.
+ F->removeAttribute(~0, Attribute::ReadOnly | Attribute::ReadNone);
+
+ // Add in the new attribute.
+ F->addAttribute(~0, ReadsMemory? Attribute::ReadOnly : Attribute::ReadNone);
+
+ if (ReadsMemory)
+ ++NumReadOnly;
+ else
+ ++NumReadNone;
+ }
+
+ return MadeChange;
+}
+
+/// AddNoCaptureAttrs - Deduce nocapture attributes for the SCC.
+bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
+ bool Changed = false;
+
+ // Check each function in turn, determining which pointer arguments are not
+ // captured.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+
+ if (F == 0)
+ // External node - skip it;
+ continue;
+
+ // Definitions with weak linkage may be overridden at linktime with
+ // something that writes memory, so treat them like declarations.
+ if (F->isDeclaration() || F->mayBeOverridden())
+ continue;
+
+ for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A!=E; ++A)
+ if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr() &&
+ !PointerMayBeCaptured(A, true, /*StoreCaptures=*/false)) {
+ A->addAttr(Attribute::NoCapture);
+ ++NumNoCapture;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// IsFunctionMallocLike - A function is malloc-like if it returns either null
+/// or a pointer that doesn't alias any other pointer visible to the caller.
+bool FunctionAttrs::IsFunctionMallocLike(Function *F,
+ SmallPtrSet<Function*, 8> &SCCNodes) const {
+ UniqueVector<Value *> FlowsToReturn;
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
+ if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator()))
+ FlowsToReturn.insert(Ret->getReturnValue());
+
+ for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
+ Value *RetVal = FlowsToReturn[i+1]; // UniqueVector[0] is reserved.
+
+ if (Constant *C = dyn_cast<Constant>(RetVal)) {
+ if (!C->isNullValue() && !isa<UndefValue>(C))
+ return false;
+
+ continue;
+ }
+
+ if (isa<Argument>(RetVal))
+ return false;
+
+ if (Instruction *RVI = dyn_cast<Instruction>(RetVal))
+ switch (RVI->getOpcode()) {
+ // Extend the analysis by looking upwards.
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ FlowsToReturn.insert(RVI->getOperand(0));
+ continue;
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(RVI);
+ FlowsToReturn.insert(SI->getTrueValue());
+ FlowsToReturn.insert(SI->getFalseValue());
+ continue;
+ }
+ case Instruction::PHI: {
+ PHINode *PN = cast<PHINode>(RVI);
+ for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ FlowsToReturn.insert(PN->getIncomingValue(i));
+ continue;
+ }
+
+ // Check whether the pointer came from an allocation.
+ case Instruction::Alloca:
+ break;
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ CallSite CS(RVI);
+ if (CS.paramHasAttr(0, Attribute::NoAlias))
+ break;
+ if (CS.getCalledFunction() &&
+ SCCNodes.count(CS.getCalledFunction()))
+ break;
+ } // fall-through
+ default:
+ return false; // Did not come from an allocation.
+ }
+
+ if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false))
+ return false;
+ }
+
+ return true;
+}
+
+/// AddNoAliasAttrs - Deduce noalias attributes for the SCC.
+bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
+ SmallPtrSet<Function*, 8> SCCNodes;
+
+ // Fill SCCNodes with the elements of the SCC. Used for quickly
+ // looking up whether a given CallGraphNode is in this SCC.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+ SCCNodes.insert((*I)->getFunction());
+
+ // Check each function in turn, determining which functions return noalias
+ // pointers.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+
+ if (F == 0)
+ // External node - skip it;
+ return false;
+
+ // Already noalias.
+ if (F->doesNotAlias(0))
+ continue;
+
+ // Definitions with weak linkage may be overridden at linktime, so
+ // treat them like declarations.
+ if (F->isDeclaration() || F->mayBeOverridden())
+ return false;
+
+ // We annotate noalias return values, which are only applicable to
+ // pointer types.
+ if (!F->getReturnType()->isPointerTy())
+ continue;
+
+ if (!IsFunctionMallocLike(F, SCCNodes))
+ return false;
+ }
+
+ bool MadeChange = false;
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+ if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy())
+ continue;
+
+ F->setDoesNotAlias(0);
+ ++NumNoAlias;
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
+ bool Changed = AddReadAttrs(SCC);
+ Changed |= AddNoCaptureAttrs(SCC);
+ Changed |= AddNoAliasAttrs(SCC);
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
new file mode 100644
index 0000000..aa18601
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -0,0 +1,209 @@
+//===-- GlobalDCE.cpp - DCE unreachable internal functions ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transform is designed to eliminate unreachable internal globals from the
+// program. It uses an aggressive algorithm, searching out globals that are
+// known to be alive. After it finds all of the globals which are needed, it
+// deletes whatever is left over. This allows it to delete recursive chunks of
+// the program which are unreachable.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "globaldce"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumAliases , "Number of global aliases removed");
+STATISTIC(NumFunctions, "Number of functions removed");
+STATISTIC(NumVariables, "Number of global variables removed");
+
+namespace {
+ struct GlobalDCE : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ GlobalDCE() : ModulePass(ID) {}
+
+ // run - Do the GlobalDCE pass on the specified module, optionally updating
+ // the specified callgraph to reflect the changes.
+ //
+ bool runOnModule(Module &M);
+
+ private:
+ SmallPtrSet<GlobalValue*, 32> AliveGlobals;
+
+ /// GlobalIsNeeded - mark the specific global value as needed, and
+ /// recursively mark anything that it uses as also needed.
+ void GlobalIsNeeded(GlobalValue *GV);
+ void MarkUsedGlobalsAsNeeded(Constant *C);
+
+ bool RemoveUnusedGlobalValue(GlobalValue &GV);
+ };
+}
+
+char GlobalDCE::ID = 0;
+INITIALIZE_PASS(GlobalDCE, "globaldce",
+ "Dead Global Elimination", false, false);
+
+ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }
+
+bool GlobalDCE::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // Loop over the module, adding globals which are obviously necessary.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ Changed |= RemoveUnusedGlobalValue(*I);
+ // Functions with external linkage are needed if they have a body
+ if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage() &&
+ !I->isDeclaration() && !I->hasAvailableExternallyLinkage())
+ GlobalIsNeeded(I);
+ }
+
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ Changed |= RemoveUnusedGlobalValue(*I);
+ // Externally visible & appending globals are needed, if they have an
+ // initializer.
+ if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage() &&
+ !I->isDeclaration() && !I->hasAvailableExternallyLinkage())
+ GlobalIsNeeded(I);
+ }
+
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E; ++I) {
+ Changed |= RemoveUnusedGlobalValue(*I);
+ // Externally visible aliases are needed.
+ if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage())
+ GlobalIsNeeded(I);
+ }
+
+ // Now that all globals which are needed are in the AliveGlobals set, we loop
+ // through the program, deleting those which are not alive.
+ //
+
+ // The first pass is to drop initializers of global variables which are dead.
+ std::vector<GlobalVariable*> DeadGlobalVars; // Keep track of dead globals
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (!AliveGlobals.count(I)) {
+ DeadGlobalVars.push_back(I); // Keep track of dead globals
+ I->setInitializer(0);
+ }
+
+ // The second pass drops the bodies of functions which are dead...
+ std::vector<Function*> DeadFunctions;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!AliveGlobals.count(I)) {
+ DeadFunctions.push_back(I); // Keep track of dead globals
+ if (!I->isDeclaration())
+ I->deleteBody();
+ }
+
+ // The third pass drops targets of aliases which are dead...
+ std::vector<GlobalAlias*> DeadAliases;
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E;
+ ++I)
+ if (!AliveGlobals.count(I)) {
+ DeadAliases.push_back(I);
+ I->setAliasee(0);
+ }
+
+ if (!DeadFunctions.empty()) {
+ // Now that all interferences have been dropped, delete the actual objects
+ // themselves.
+ for (unsigned i = 0, e = DeadFunctions.size(); i != e; ++i) {
+ RemoveUnusedGlobalValue(*DeadFunctions[i]);
+ M.getFunctionList().erase(DeadFunctions[i]);
+ }
+ NumFunctions += DeadFunctions.size();
+ Changed = true;
+ }
+
+ if (!DeadGlobalVars.empty()) {
+ for (unsigned i = 0, e = DeadGlobalVars.size(); i != e; ++i) {
+ RemoveUnusedGlobalValue(*DeadGlobalVars[i]);
+ M.getGlobalList().erase(DeadGlobalVars[i]);
+ }
+ NumVariables += DeadGlobalVars.size();
+ Changed = true;
+ }
+
+ // Now delete any dead aliases.
+ if (!DeadAliases.empty()) {
+ for (unsigned i = 0, e = DeadAliases.size(); i != e; ++i) {
+ RemoveUnusedGlobalValue(*DeadAliases[i]);
+ M.getAliasList().erase(DeadAliases[i]);
+ }
+ NumAliases += DeadAliases.size();
+ Changed = true;
+ }
+
+ // Make sure that all memory is released
+ AliveGlobals.clear();
+
+ return Changed;
+}
+
+/// GlobalIsNeeded - the specific global value as needed, and
+/// recursively mark anything that it uses as also needed.
+void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
+ // If the global is already in the set, no need to reprocess it.
+ if (!AliveGlobals.insert(G))
+ return;
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) {
+ // If this is a global variable, we must make sure to add any global values
+ // referenced by the initializer to the alive set.
+ if (GV->hasInitializer())
+ MarkUsedGlobalsAsNeeded(GV->getInitializer());
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(G)) {
+ // The target of a global alias is needed.
+ MarkUsedGlobalsAsNeeded(GA->getAliasee());
+ } else {
+ // Otherwise this must be a function object. We have to scan the body of
+ // the function looking for constants and global values which are used as
+ // operands. Any operands of these types must be processed to ensure that
+ // any globals used will be marked as needed.
+ Function *F = cast<Function>(G);
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U)
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(*U))
+ GlobalIsNeeded(GV);
+ else if (Constant *C = dyn_cast<Constant>(*U))
+ MarkUsedGlobalsAsNeeded(C);
+ }
+}
+
+void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return GlobalIsNeeded(GV);
+
+ // Loop over all of the operands of the constant, adding any globals they
+ // use to the list of needed globals.
+ for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I)
+ if (Constant *OpC = dyn_cast<Constant>(*I))
+ MarkUsedGlobalsAsNeeded(OpC);
+}
+
+// RemoveUnusedGlobalValue - Loop over all of the uses of the specified
+// GlobalValue, looking for the constant pointer ref that may be pointing to it.
+// If found, check to see if the constant pointer ref is safe to destroy, and if
+// so, nuke it. This will reduce the reference count on the global value, which
+// might make it deader.
+//
+bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) {
+ if (GV.use_empty()) return false;
+ GV.removeDeadConstantUsers();
+ return GV.use_empty();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
new file mode 100644
index 0000000..a77af54
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -0,0 +1,2576 @@
+//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms simple global variables that never have their address
+// taken. If obviously true, it marks read/write globals as constant, deletes
+// variables only stored to, etc.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "globalopt"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumMarked , "Number of globals marked constant");
+STATISTIC(NumSRA , "Number of aggregate globals broken into scalars");
+STATISTIC(NumHeapSRA , "Number of heap objects SRA'd");
+STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
+STATISTIC(NumDeleted , "Number of globals deleted");
+STATISTIC(NumFnDeleted , "Number of functions deleted");
+STATISTIC(NumGlobUses , "Number of global uses devirtualized");
+STATISTIC(NumLocalized , "Number of globals localized");
+STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans");
+STATISTIC(NumFastCallFns , "Number of functions converted to fastcc");
+STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
+STATISTIC(NumNestRemoved , "Number of nest attributes removed");
+STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
+STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
+
+namespace {
+ struct GlobalOpt : public ModulePass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ }
+ static char ID; // Pass identification, replacement for typeid
+ GlobalOpt() : ModulePass(ID) {}
+
+ bool runOnModule(Module &M);
+
+ private:
+ GlobalVariable *FindGlobalCtors(Module &M);
+ bool OptimizeFunctions(Module &M);
+ bool OptimizeGlobalVars(Module &M);
+ bool OptimizeGlobalAliases(Module &M);
+ bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
+ bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
+ };
+}
+
+char GlobalOpt::ID = 0;
+INITIALIZE_PASS(GlobalOpt, "globalopt",
+ "Global Variable Optimizer", false, false);
+
+ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
+
+namespace {
+
+/// GlobalStatus - As we analyze each global, keep track of some information
+/// about it. If we find out that the address of the global is taken, none of
+/// this info will be accurate.
+struct GlobalStatus {
+ /// isLoaded - True if the global is ever loaded. If the global isn't ever
+ /// loaded it can be deleted.
+ bool isLoaded;
+
+ /// StoredType - Keep track of what stores to the global look like.
+ ///
+ enum StoredType {
+ /// NotStored - There is no store to this global. It can thus be marked
+ /// constant.
+ NotStored,
+
+ /// isInitializerStored - This global is stored to, but the only thing
+ /// stored is the constant it was initialized with. This is only tracked
+ /// for scalar globals.
+ isInitializerStored,
+
+ /// isStoredOnce - This global is stored to, but only its initializer and
+ /// one other value is ever stored to it. If this global isStoredOnce, we
+ /// track the value stored to it in StoredOnceValue below. This is only
+ /// tracked for scalar globals.
+ isStoredOnce,
+
+ /// isStored - This global is stored to by multiple values or something else
+ /// that we cannot track.
+ isStored
+ } StoredType;
+
+ /// StoredOnceValue - If only one value (besides the initializer constant) is
+ /// ever stored to this global, keep track of what value it is.
+ Value *StoredOnceValue;
+
+ /// AccessingFunction/HasMultipleAccessingFunctions - These start out
+ /// null/false. When the first accessing function is noticed, it is recorded.
+ /// When a second different accessing function is noticed,
+ /// HasMultipleAccessingFunctions is set to true.
+ const Function *AccessingFunction;
+ bool HasMultipleAccessingFunctions;
+
+ /// HasNonInstructionUser - Set to true if this global has a user that is not
+ /// an instruction (e.g. a constant expr or GV initializer).
+ bool HasNonInstructionUser;
+
+ /// HasPHIUser - Set to true if this global has a user that is a PHI node.
+ bool HasPHIUser;
+
+ GlobalStatus() : isLoaded(false), StoredType(NotStored), StoredOnceValue(0),
+ AccessingFunction(0), HasMultipleAccessingFunctions(false),
+ HasNonInstructionUser(false), HasPHIUser(false) {}
+};
+
+}
+
+// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
+// by constants itself. Note that constants cannot be cyclic, so this test is
+// pretty easy to implement recursively.
+//
+static bool SafeToDestroyConstant(const Constant *C) {
+ if (isa<GlobalValue>(C)) return false;
+
+ for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
+ ++UI)
+ if (const Constant *CU = dyn_cast<Constant>(*UI)) {
+ if (!SafeToDestroyConstant(CU)) return false;
+ } else
+ return false;
+ return true;
+}
+
+
+/// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus
+/// structure. If the global has its address taken, return true to indicate we
+/// can't do anything with it.
+///
+static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
+ SmallPtrSet<const PHINode*, 16> &PHIUsers) {
+ for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+ ++UI) {
+ const User *U = *UI;
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+ GS.HasNonInstructionUser = true;
+ if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
+ } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
+ if (!GS.HasMultipleAccessingFunctions) {
+ const Function *F = I->getParent()->getParent();
+ if (GS.AccessingFunction == 0)
+ GS.AccessingFunction = F;
+ else if (GS.AccessingFunction != F)
+ GS.HasMultipleAccessingFunctions = true;
+ }
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ GS.isLoaded = true;
+ if (LI->isVolatile()) return true; // Don't hack on volatile loads.
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Don't allow a store OF the address, only stores TO the address.
+ if (SI->getOperand(0) == V) return true;
+
+ if (SI->isVolatile()) return true; // Don't hack on volatile stores.
+
+ // If this is a direct store to the global (i.e., the global is a scalar
+ // value, not an aggregate), keep more specific information about
+ // stores.
+ if (GS.StoredType != GlobalStatus::isStored) {
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(
+ SI->getOperand(1))) {
+ Value *StoredVal = SI->getOperand(0);
+ if (StoredVal == GV->getInitializer()) {
+ if (GS.StoredType < GlobalStatus::isInitializerStored)
+ GS.StoredType = GlobalStatus::isInitializerStored;
+ } else if (isa<LoadInst>(StoredVal) &&
+ cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
+ if (GS.StoredType < GlobalStatus::isInitializerStored)
+ GS.StoredType = GlobalStatus::isInitializerStored;
+ } else if (GS.StoredType < GlobalStatus::isStoredOnce) {
+ GS.StoredType = GlobalStatus::isStoredOnce;
+ GS.StoredOnceValue = StoredVal;
+ } else if (GS.StoredType == GlobalStatus::isStoredOnce &&
+ GS.StoredOnceValue == StoredVal) {
+ // noop.
+ } else {
+ GS.StoredType = GlobalStatus::isStored;
+ }
+ } else {
+ GS.StoredType = GlobalStatus::isStored;
+ }
+ }
+ } else if (isa<GetElementPtrInst>(I)) {
+ if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+ } else if (isa<SelectInst>(I)) {
+ if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+ } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
+ // PHI nodes we can check just like select or GEP instructions, but we
+ // have to be careful about infinite recursion.
+ if (PHIUsers.insert(PN)) // Not already visited.
+ if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+ GS.HasPHIUser = true;
+ } else if (isa<CmpInst>(I)) {
+ // Nothing to analyse.
+ } else if (isa<MemTransferInst>(I)) {
+ const MemTransferInst *MTI = cast<MemTransferInst>(I);
+ if (MTI->getArgOperand(0) == V)
+ GS.StoredType = GlobalStatus::isStored;
+ if (MTI->getArgOperand(1) == V)
+ GS.isLoaded = true;
+ } else if (isa<MemSetInst>(I)) {
+ assert(cast<MemSetInst>(I)->getArgOperand(0) == V &&
+ "Memset only takes one pointer!");
+ GS.StoredType = GlobalStatus::isStored;
+ } else {
+ return true; // Any other non-load instruction might take address!
+ }
+ } else if (const Constant *C = dyn_cast<Constant>(U)) {
+ GS.HasNonInstructionUser = true;
+ // We might have a dead and dangling constant hanging off of here.
+ if (!SafeToDestroyConstant(C))
+ return true;
+ } else {
+ GS.HasNonInstructionUser = true;
+ // Otherwise must be some other user.
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(Idx);
+ if (!CI) return 0;
+ unsigned IdxV = CI->getZExtValue();
+
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg)) {
+ if (IdxV < CS->getNumOperands()) return CS->getOperand(IdxV);
+ } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg)) {
+ if (IdxV < CA->getNumOperands()) return CA->getOperand(IdxV);
+ } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Agg)) {
+ if (IdxV < CP->getNumOperands()) return CP->getOperand(IdxV);
+ } else if (isa<ConstantAggregateZero>(Agg)) {
+ if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) {
+ if (IdxV < STy->getNumElements())
+ return Constant::getNullValue(STy->getElementType(IdxV));
+ } else if (const SequentialType *STy =
+ dyn_cast<SequentialType>(Agg->getType())) {
+ return Constant::getNullValue(STy->getElementType());
+ }
+ } else if (isa<UndefValue>(Agg)) {
+ if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) {
+ if (IdxV < STy->getNumElements())
+ return UndefValue::get(STy->getElementType(IdxV));
+ } else if (const SequentialType *STy =
+ dyn_cast<SequentialType>(Agg->getType())) {
+ return UndefValue::get(STy->getElementType());
+ }
+ }
+ return 0;
+}
+
+
+/// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all
+/// users of the global, cleaning up the obvious ones. This is largely just a
+/// quick scan over the use list to clean up the easy and obvious cruft. This
+/// returns true if it made a change.
+static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
+ bool Changed = false;
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) {
+ User *U = *UI++;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ if (Init) {
+ // Replace the load with the initializer.
+ LI->replaceAllUsesWith(Init);
+ LI->eraseFromParent();
+ Changed = true;
+ }
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ // Store must be unreachable or storing Init into the global.
+ SI->eraseFromParent();
+ Changed = true;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ Constant *SubInit = 0;
+ if (Init)
+ SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
+ Changed |= CleanupConstantGlobalUsers(CE, SubInit);
+ } else if (CE->getOpcode() == Instruction::BitCast &&
+ CE->getType()->isPointerTy()) {
+ // Pointer cast, delete any stores and memsets to the global.
+ Changed |= CleanupConstantGlobalUsers(CE, 0);
+ }
+
+ if (CE->use_empty()) {
+ CE->destroyConstant();
+ Changed = true;
+ }
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ // Do not transform "gepinst (gep constexpr (GV))" here, because forming
+ // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
+ // and will invalidate our notion of what Init is.
+ Constant *SubInit = 0;
+ if (!isa<ConstantExpr>(GEP->getOperand(0))) {
+ ConstantExpr *CE =
+ dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP));
+ if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
+ SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
+ }
+ Changed |= CleanupConstantGlobalUsers(GEP, SubInit);
+
+ if (GEP->use_empty()) {
+ GEP->eraseFromParent();
+ Changed = true;
+ }
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
+ if (MI->getRawDest() == V) {
+ MI->eraseFromParent();
+ Changed = true;
+ }
+
+ } else if (Constant *C = dyn_cast<Constant>(U)) {
+ // If we have a chain of dead constantexprs or other things dangling from
+ // us, and if they are all dead, nuke them without remorse.
+ if (SafeToDestroyConstant(C)) {
+ C->destroyConstant();
+ // This could have invalidated UI, start over from scratch.
+ CleanupConstantGlobalUsers(V, Init);
+ return true;
+ }
+ }
+ }
+ return Changed;
+}
+
+/// isSafeSROAElementUse - Return true if the specified instruction is a safe
+/// user of a derived expression from a global that we want to SROA.
+static bool isSafeSROAElementUse(Value *V) {
+ // We might have a dead and dangling constant hanging off of here.
+ if (Constant *C = dyn_cast<Constant>(V))
+ return SafeToDestroyConstant(C);
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // Loads are ok.
+ if (isa<LoadInst>(I)) return true;
+
+ // Stores *to* the pointer are ok.
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->getOperand(0) != V;
+
+ // Otherwise, it must be a GEP.
+ GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
+ if (GEPI == 0) return false;
+
+ if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
+ !cast<Constant>(GEPI->getOperand(1))->isNullValue())
+ return false;
+
+ for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end();
+ I != E; ++I)
+ if (!isSafeSROAElementUse(*I))
+ return false;
+ return true;
+}
+
+
+/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value.
+/// Look at it and its uses and decide whether it is safe to SROA this global.
+///
+static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
+ // The user of the global must be a GEP Inst or a ConstantExpr GEP.
+ if (!isa<GetElementPtrInst>(U) &&
+ (!isa<ConstantExpr>(U) ||
+ cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
+ return false;
+
+ // Check to see if this ConstantExpr GEP is SRA'able. In particular, we
+ // don't like < 3 operand CE's, and we don't like non-constant integer
+ // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some
+ // value of C.
+ if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
+ !cast<Constant>(U->getOperand(1))->isNullValue() ||
+ !isa<ConstantInt>(U->getOperand(2)))
+ return false;
+
+ gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
+ ++GEPI; // Skip over the pointer index.
+
+ // If this is a use of an array allocation, do a bit more checking for sanity.
+ if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
+ uint64_t NumElements = AT->getNumElements();
+ ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
+
+ // Check to make sure that index falls within the array. If not,
+ // something funny is going on, so we won't do the optimization.
+ //
+ if (Idx->getZExtValue() >= NumElements)
+ return false;
+
+ // We cannot scalar repl this level of the array unless any array
+ // sub-indices are in-range constants. In particular, consider:
+ // A[0][i]. We cannot know that the user isn't doing invalid things like
+ // allowing i to index an out-of-range subscript that accesses A[1].
+ //
+ // Scalar replacing *just* the outer index of the array is probably not
+ // going to be a win anyway, so just give up.
+ for (++GEPI; // Skip array index.
+ GEPI != E;
+ ++GEPI) {
+ uint64_t NumElements;
+ if (const ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI))
+ NumElements = SubArrayTy->getNumElements();
+ else if (const VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI))
+ NumElements = SubVectorTy->getNumElements();
+ else {
+ assert((*GEPI)->isStructTy() &&
+ "Indexed GEP type is not array, vector, or struct!");
+ continue;
+ }
+
+ ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
+ if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
+ return false;
+ }
+ }
+
+ for (Value::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I)
+ if (!isSafeSROAElementUse(*I))
+ return false;
+ return true;
+}
+
+/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it
+/// is safe for us to perform this transformation.
+///
+static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
+ for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end();
+ UI != E; ++UI) {
+ if (!IsUserOfGlobalSafeForSRA(*UI, GV))
+ return false;
+ }
+ return true;
+}
+
+
+/// SRAGlobal - Perform scalar replacement of aggregates on the specified global
+/// variable. This opens the door for other optimizations by exposing the
+/// behavior of the program in a more fine-grained way. We have determined that
+/// this transformation is safe already. We return the first global variable we
+/// insert so that the caller can reprocess it.
+static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
+ // Make sure this global only has simple uses that we can SRA.
+ if (!GlobalUsersSafeToSRA(GV))
+ return 0;
+
+ assert(GV->hasLocalLinkage() && !GV->isConstant());
+ Constant *Init = GV->getInitializer();
+ const Type *Ty = Init->getType();
+
+ std::vector<GlobalVariable*> NewGlobals;
+ Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
+
+ // Get the alignment of the global, either explicit or target-specific.
+ unsigned StartAlignment = GV->getAlignment();
+ if (StartAlignment == 0)
+ StartAlignment = TD.getABITypeAlignment(GV->getType());
+
+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ NewGlobals.reserve(STy->getNumElements());
+ const StructLayout &Layout = *TD.getStructLayout(STy);
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Constant *In = getAggregateConstantElement(Init,
+ ConstantInt::get(Type::getInt32Ty(STy->getContext()), i));
+ assert(In && "Couldn't get element of initializer?");
+ GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
+ GlobalVariable::InternalLinkage,
+ In, GV->getName()+"."+Twine(i),
+ GV->isThreadLocal(),
+ GV->getType()->getAddressSpace());
+ Globals.insert(GV, NGV);
+ NewGlobals.push_back(NGV);
+
+ // Calculate the known alignment of the field. If the original aggregate
+ // had 256 byte alignment for example, something might depend on that:
+ // propagate info to each field.
+ uint64_t FieldOffset = Layout.getElementOffset(i);
+ unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
+ if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i)))
+ NGV->setAlignment(NewAlign);
+ }
+ } else if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
+ unsigned NumElements = 0;
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(STy))
+ NumElements = ATy->getNumElements();
+ else
+ NumElements = cast<VectorType>(STy)->getNumElements();
+
+ if (NumElements > 16 && GV->hasNUsesOrMore(16))
+ return 0; // It's not worth it.
+ NewGlobals.reserve(NumElements);
+
+ uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType());
+ unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
+ for (unsigned i = 0, e = NumElements; i != e; ++i) {
+ Constant *In = getAggregateConstantElement(Init,
+ ConstantInt::get(Type::getInt32Ty(Init->getContext()), i));
+ assert(In && "Couldn't get element of initializer?");
+
+ GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
+ GlobalVariable::InternalLinkage,
+ In, GV->getName()+"."+Twine(i),
+ GV->isThreadLocal(),
+ GV->getType()->getAddressSpace());
+ Globals.insert(GV, NGV);
+ NewGlobals.push_back(NGV);
+
+ // Calculate the known alignment of the field. If the original aggregate
+ // had 256 byte alignment for example, something might depend on that:
+ // propagate info to each field.
+ unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
+ if (NewAlign > EltAlign)
+ NGV->setAlignment(NewAlign);
+ }
+ }
+
+ if (NewGlobals.empty())
+ return 0;
+
+ DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
+
+ Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
+
+ // Loop over all of the uses of the global, replacing the constantexpr geps,
+ // with smaller constantexpr geps or direct references.
+ while (!GV->use_empty()) {
+ User *GEP = GV->use_back();
+ assert(((isa<ConstantExpr>(GEP) &&
+ cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||
+ isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!");
+
+ // Ignore the 1th operand, which has to be zero or else the program is quite
+ // broken (undefined). Get the 2nd operand, which is the structure or array
+ // index.
+ unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
+ if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
+
+ Value *NewPtr = NewGlobals[Val];
+
+ // Form a shorter GEP if needed.
+ if (GEP->getNumOperands() > 3) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) {
+ SmallVector<Constant*, 8> Idxs;
+ Idxs.push_back(NullInt);
+ for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
+ Idxs.push_back(CE->getOperand(i));
+ NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr),
+ &Idxs[0], Idxs.size());
+ } else {
+ GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
+ SmallVector<Value*, 8> Idxs;
+ Idxs.push_back(NullInt);
+ for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
+ Idxs.push_back(GEPI->getOperand(i));
+ NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(),
+ GEPI->getName()+"."+Twine(Val),GEPI);
+ }
+ }
+ GEP->replaceAllUsesWith(NewPtr);
+
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))
+ GEPI->eraseFromParent();
+ else
+ cast<ConstantExpr>(GEP)->destroyConstant();
+ }
+
+ // Delete the old global, now that it is dead.
+ Globals.erase(GV);
+ ++NumSRA;
+
+ // Loop over the new globals array deleting any globals that are obviously
+ // dead. This can arise due to scalarization of a structure or an array that
+ // has elements that are dead.
+ unsigned FirstGlobal = 0;
+ for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i)
+ if (NewGlobals[i]->use_empty()) {
+ Globals.erase(NewGlobals[i]);
+ if (FirstGlobal == i) ++FirstGlobal;
+ }
+
+ return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0;
+}
+
+/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
+/// value will trap if the value is dynamically null. PHIs keeps track of any
+/// phi nodes we've seen to avoid reprocessing them.
+static bool AllUsesOfValueWillTrapIfNull(const Value *V,
+ SmallPtrSet<const PHINode*, 8> &PHIs) {
+ for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+ ++UI) {
+ const User *U = *UI;
+
+ if (isa<LoadInst>(U)) {
+ // Will trap.
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getOperand(0) == V) {
+ //cerr << "NONTRAPPING USE: " << *U;
+ return false; // Storing the value.
+ }
+ } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
+ if (CI->getCalledValue() != V) {
+ //cerr << "NONTRAPPING USE: " << *U;
+ return false; // Not calling the ptr
+ }
+ } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
+ if (II->getCalledValue() != V) {
+ //cerr << "NONTRAPPING USE: " << *U;
+ return false; // Not calling the ptr
+ }
+ } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
+ if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
+ } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+ if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
+ } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
+ // If we've already seen this phi node, ignore it, it has already been
+ // checked.
+ if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
+ return false;
+ } else if (isa<ICmpInst>(U) &&
+ isa<ConstantPointerNull>(UI->getOperand(1))) {
+ // Ignore icmp X, null
+ } else {
+ //cerr << "NONTRAPPING USE: " << *U;
+ return false;
+ }
+ }
+ return true;
+}
+
+/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads
+/// from GV will trap if the loaded value is null. Note that this also permits
+/// comparisons of the loaded value against null, as a special case.
+static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
+ for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
+ UI != E; ++UI) {
+ const User *U = *UI;
+
+ if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ SmallPtrSet<const PHINode*, 8> PHIs;
+ if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
+ return false;
+ } else if (isa<StoreInst>(U)) {
+ // Ignore stores to the global.
+ } else {
+ // We don't know or understand this user, bail out.
+ //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
+ bool Changed = false;
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) {
+ Instruction *I = cast<Instruction>(*UI++);
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ LI->setOperand(0, NewV);
+ Changed = true;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (SI->getOperand(1) == V) {
+ SI->setOperand(1, NewV);
+ Changed = true;
+ }
+ } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+ CallSite CS(I);
+ if (CS.getCalledValue() == V) {
+ // Calling through the pointer! Turn into a direct call, but be careful
+ // that the pointer is not also being passed as an argument.
+ CS.setCalledFunction(NewV);
+ Changed = true;
+ bool PassedAsArg = false;
+ for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
+ if (CS.getArgument(i) == V) {
+ PassedAsArg = true;
+ CS.setArgument(i, NewV);
+ }
+
+ if (PassedAsArg) {
+ // Being passed as an argument also. Be careful to not invalidate UI!
+ UI = V->use_begin();
+ }
+ }
+ } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ Changed |= OptimizeAwayTrappingUsesOfValue(CI,
+ ConstantExpr::getCast(CI->getOpcode(),
+ NewV, CI->getType()));
+ if (CI->use_empty()) {
+ Changed = true;
+ CI->eraseFromParent();
+ }
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+ // Should handle GEP here.
+ SmallVector<Constant*, 8> Idxs;
+ Idxs.reserve(GEPI->getNumOperands()-1);
+ for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
+ i != e; ++i)
+ if (Constant *C = dyn_cast<Constant>(*i))
+ Idxs.push_back(C);
+ else
+ break;
+ if (Idxs.size() == GEPI->getNumOperands()-1)
+ Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
+ ConstantExpr::getGetElementPtr(NewV, &Idxs[0],
+ Idxs.size()));
+ if (GEPI->use_empty()) {
+ Changed = true;
+ GEPI->eraseFromParent();
+ }
+ }
+ }
+
+ return Changed;
+}
+
+
+/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null
+/// value stored into it. If there are uses of the loaded value that would trap
+/// if the loaded value is dynamically null, then we know that they cannot be
+/// reachable with a null optimize away the load.
+static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
+ bool Changed = false;
+
+ // Keep track of whether we are able to remove all the uses of the global
+ // other than the store that defines it.
+ bool AllNonStoreUsesGone = true;
+
+ // Replace all uses of loads with uses of uses of the stored value.
+ for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
+ User *GlobalUser = *GUI++;
+ if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
+ Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
+ // If we were able to delete all uses of the loads
+ if (LI->use_empty()) {
+ LI->eraseFromParent();
+ Changed = true;
+ } else {
+ AllNonStoreUsesGone = false;
+ }
+ } else if (isa<StoreInst>(GlobalUser)) {
+ // Ignore the store that stores "LV" to the global.
+ assert(GlobalUser->getOperand(1) == GV &&
+ "Must be storing *to* the global");
+ } else {
+ AllNonStoreUsesGone = false;
+
+ // If we get here we could have other crazy uses that are transitively
+ // loaded.
+ assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
+ isa<ConstantExpr>(GlobalUser)) && "Only expect load and stores!");
+ }
+ }
+
+ if (Changed) {
+ DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV);
+ ++NumGlobUses;
+ }
+
+ // If we nuked all of the loads, then none of the stores are needed either,
+ // nor is the global.
+ if (AllNonStoreUsesGone) {
+ DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
+ CleanupConstantGlobalUsers(GV, 0);
+ if (GV->use_empty()) {
+ GV->eraseFromParent();
+ ++NumDeleted;
+ }
+ Changed = true;
+ }
+ return Changed;
+}
+
+/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
+/// instructions that are foldable.
+static void ConstantPropUsersOf(Value *V) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
+ if (Instruction *I = dyn_cast<Instruction>(*UI++))
+ if (Constant *NewC = ConstantFoldInstruction(I)) {
+ I->replaceAllUsesWith(NewC);
+
+ // Advance UI to the next non-I use to avoid invalidating it!
+ // Instructions could multiply use V.
+ while (UI != E && *UI == I)
+ ++UI;
+ I->eraseFromParent();
+ }
+}
+
+/// OptimizeGlobalAddressOfMalloc - This function takes the specified global
+/// variable, and transforms the program as if it always contained the result of
+/// the specified malloc. Because it is always the result of the specified
+/// malloc, there is no reason to actually DO the malloc. Instead, turn the
+/// malloc into a global, and any loads of GV as uses of the new global.
+static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
+ CallInst *CI,
+ const Type *AllocTy,
+ ConstantInt *NElements,
+ TargetData* TD) {
+ DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
+
+ const Type *GlobalType;
+ if (NElements->getZExtValue() == 1)
+ GlobalType = AllocTy;
+ else
+ // If we have an array allocation, the global variable is of an array.
+ GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
+
+ // Create the new global variable. The contents of the malloc'd memory is
+ // undefined, so initialize with an undef value.
+ GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
+ GlobalType, false,
+ GlobalValue::InternalLinkage,
+ UndefValue::get(GlobalType),
+ GV->getName()+".body",
+ GV,
+ GV->isThreadLocal());
+
+ // If there are bitcast users of the malloc (which is typical, usually we have
+ // a malloc + bitcast) then replace them with uses of the new global. Update
+ // other users to use the global as well.
+ BitCastInst *TheBC = 0;
+ while (!CI->use_empty()) {
+ Instruction *User = cast<Instruction>(CI->use_back());
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
+ if (BCI->getType() == NewGV->getType()) {
+ BCI->replaceAllUsesWith(NewGV);
+ BCI->eraseFromParent();
+ } else {
+ BCI->setOperand(0, NewGV);
+ }
+ } else {
+ if (TheBC == 0)
+ TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
+ User->replaceUsesOfWith(CI, TheBC);
+ }
+ }
+
+ Constant *RepValue = NewGV;
+ if (NewGV->getType() != GV->getType()->getElementType())
+ RepValue = ConstantExpr::getBitCast(RepValue,
+ GV->getType()->getElementType());
+
+ // If there is a comparison against null, we will insert a global bool to
+ // keep track of whether the global was initialized yet or not.
+ GlobalVariable *InitBool =
+ new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
+ GlobalValue::InternalLinkage,
+ ConstantInt::getFalse(GV->getContext()),
+ GV->getName()+".init", GV->isThreadLocal());
+ bool InitBoolUsed = false;
+
+ // Loop over all uses of GV, processing them in turn.
+ while (!GV->use_empty()) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(GV->use_back())) {
+ // The global is initialized when the store to it occurs.
+ new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, SI);
+ SI->eraseFromParent();
+ continue;
+ }
+
+ LoadInst *LI = cast<LoadInst>(GV->use_back());
+ while (!LI->use_empty()) {
+ Use &LoadUse = LI->use_begin().getUse();
+ if (!isa<ICmpInst>(LoadUse.getUser())) {
+ LoadUse = RepValue;
+ continue;
+ }
+
+ ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
+ // Replace the cmp X, 0 with a use of the bool value.
+ Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI);
+ InitBoolUsed = true;
+ switch (ICI->getPredicate()) {
+ default: llvm_unreachable("Unknown ICmp Predicate!");
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT: // X < null -> always false
+ LV = ConstantInt::getFalse(GV->getContext());
+ break;
+ case ICmpInst::ICMP_ULE:
+ case ICmpInst::ICMP_SLE:
+ case ICmpInst::ICMP_EQ:
+ LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
+ break;
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_UGE:
+ case ICmpInst::ICMP_SGE:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT:
+ break; // no change.
+ }
+ ICI->replaceAllUsesWith(LV);
+ ICI->eraseFromParent();
+ }
+ LI->eraseFromParent();
+ }
+
+ // If the initialization boolean was used, insert it, otherwise delete it.
+ if (!InitBoolUsed) {
+ while (!InitBool->use_empty()) // Delete initializations
+ cast<StoreInst>(InitBool->use_back())->eraseFromParent();
+ delete InitBool;
+ } else
+ GV->getParent()->getGlobalList().insert(GV, InitBool);
+
+ // Now the GV is dead, nuke it and the malloc..
+ GV->eraseFromParent();
+ CI->eraseFromParent();
+
+ // To further other optimizations, loop over all users of NewGV and try to
+ // constant prop them. This will promote GEP instructions with constant
+ // indices into GEP constant-exprs, which will allow global-opt to hack on it.
+ ConstantPropUsersOf(NewGV);
+ if (RepValue != NewGV)
+ ConstantPropUsersOf(RepValue);
+
+ return NewGV;
+}
+
+/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking
+/// to make sure that there are no complex uses of V. We permit simple things
+/// like dereferencing the pointer, but not storing through the address, unless
+/// it is to the specified global.
+static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
+ const GlobalVariable *GV,
+ SmallPtrSet<const PHINode*, 8> &PHIs) {
+ for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ const Instruction *Inst = cast<Instruction>(*UI);
+
+ if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
+ continue; // Fine, ignore.
+ }
+
+ if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
+ return false; // Storing the pointer itself... bad.
+ continue; // Otherwise, storing through it, or storing into GV... fine.
+ }
+
+ // Must index into the array and into the struct.
+ if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
+ if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
+ return false;
+ continue;
+ }
+
+ if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
+ // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI
+ // cycles.
+ if (PHIs.insert(PN))
+ if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
+ return false;
+ continue;
+ }
+
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
+ if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
+ return false;
+ continue;
+ }
+
+ return false;
+ }
+ return true;
+}
+
+/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
+/// somewhere. Transform all uses of the allocation into loads from the
+/// global and uses of the resultant pointer. Further, delete the store into
+/// GV. This assumes that these value pass the
+/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
+static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
+ GlobalVariable *GV) {
+ while (!Alloc->use_empty()) {
+ Instruction *U = cast<Instruction>(*Alloc->use_begin());
+ Instruction *InsertPt = U;
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ // If this is the store of the allocation into the global, remove it.
+ if (SI->getOperand(1) == GV) {
+ SI->eraseFromParent();
+ continue;
+ }
+ } else if (PHINode *PN = dyn_cast<PHINode>(U)) {
+ // Insert the load in the corresponding predecessor, not right before the
+ // PHI.
+ InsertPt = PN->getIncomingBlock(Alloc->use_begin())->getTerminator();
+ } else if (isa<BitCastInst>(U)) {
+ // Must be bitcast between the malloc and store to initialize the global.
+ ReplaceUsesOfMallocWithGlobal(U, GV);
+ U->eraseFromParent();
+ continue;
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+ // If this is a "GEP bitcast" and the user is a store to the global, then
+ // just process it as a bitcast.
+ if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
+ if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->use_back()))
+ if (SI->getOperand(1) == GV) {
+ // Must be bitcast GEP between the malloc and store to initialize
+ // the global.
+ ReplaceUsesOfMallocWithGlobal(GEPI, GV);
+ GEPI->eraseFromParent();
+ continue;
+ }
+ }
+
+ // Insert a load from the global, and use it instead of the malloc.
+ Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
+ U->replaceUsesOfWith(Alloc, NL);
+ }
+}
+
+/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi
+/// of a load) are simple enough to perform heap SRA on. This permits GEP's
+/// that index through the array and struct field, icmps of null, and PHIs.
+static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
+ SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs,
+ SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) {
+ // We permit two users of the load: setcc comparing against the null
+ // pointer, and a getelementptr of a specific form.
+ for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+ ++UI) {
+ const Instruction *User = cast<Instruction>(*UI);
+
+ // Comparison against null is ok.
+ if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) {
+ if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
+ return false;
+ continue;
+ }
+
+ // getelementptr is also ok, but only a simple form.
+ if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ // Must index into the array and into the struct.
+ if (GEPI->getNumOperands() < 3)
+ return false;
+
+ // Otherwise the GEP is ok.
+ continue;
+ }
+
+ if (const PHINode *PN = dyn_cast<PHINode>(User)) {
+ if (!LoadUsingPHIsPerLoad.insert(PN))
+ // This means some phi nodes are dependent on each other.
+ // Avoid infinite looping!
+ return false;
+ if (!LoadUsingPHIs.insert(PN))
+ // If we have already analyzed this PHI, then it is safe.
+ continue;
+
+ // Make sure all uses of the PHI are simple enough to transform.
+ if (!LoadUsesSimpleEnoughForHeapSRA(PN,
+ LoadUsingPHIs, LoadUsingPHIsPerLoad))
+ return false;
+
+ continue;
+ }
+
+ // Otherwise we don't know what this is, not ok.
+ return false;
+ }
+
+ return true;
+}
+
+
+/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
+/// GV are simple enough to perform HeapSRA, return true.
+static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
+ Instruction *StoredVal) {
+ SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
+ SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
+ for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
+ UI != E; ++UI)
+ if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+ if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
+ LoadUsingPHIsPerLoad))
+ return false;
+ LoadUsingPHIsPerLoad.clear();
+ }
+
+ // If we reach here, we know that all uses of the loads and transitive uses
+ // (through PHI nodes) are simple enough to transform. However, we don't know
+ // that all inputs the to the PHI nodes are in the same equivalence sets.
+ // Check to verify that all operands of the PHIs are either PHIS that can be
+ // transformed, loads from GV, or MI itself.
+ for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin()
+ , E = LoadUsingPHIs.end(); I != E; ++I) {
+ const PHINode *PN = *I;
+ for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
+ Value *InVal = PN->getIncomingValue(op);
+
+ // PHI of the stored value itself is ok.
+ if (InVal == StoredVal) continue;
+
+ if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
+ // One of the PHIs in our set is (optimistically) ok.
+ if (LoadUsingPHIs.count(InPN))
+ continue;
+ return false;
+ }
+
+ // Load from GV is ok.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
+ if (LI->getOperand(0) == GV)
+ continue;
+
+ // UNDEF? NULL?
+
+ // Anything else is rejected.
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
+ DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+ std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
+
+ if (FieldNo >= FieldVals.size())
+ FieldVals.resize(FieldNo+1);
+
+ // If we already have this value, just reuse the previously scalarized
+ // version.
+ if (Value *FieldVal = FieldVals[FieldNo])
+ return FieldVal;
+
+ // Depending on what instruction this is, we have several cases.
+ Value *Result;
+ if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
+ // This is a scalarized version of the load from the global. Just create
+ // a new Load of the scalarized global.
+ Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
+ InsertedScalarizedValues,
+ PHIsToRewrite),
+ LI->getName()+".f"+Twine(FieldNo), LI);
+ } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ // PN's type is pointer to struct. Make a new PHI of pointer to struct
+ // field.
+ const StructType *ST =
+ cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
+
+ Result =
+ PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
+ PN->getName()+".f"+Twine(FieldNo), PN);
+ PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
+ } else {
+ llvm_unreachable("Unknown usable value");
+ Result = 0;
+ }
+
+ return FieldVals[FieldNo] = Result;
+}
+
+/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
+/// the load, rewrite the derived value to use the HeapSRoA'd load.
+static void RewriteHeapSROALoadUser(Instruction *LoadUser,
+ DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+ // If this is a comparison against null, handle it.
+ if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
+ assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
+ // If we have a setcc of the loaded pointer, we can use a setcc of any
+ // field.
+ Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
+ InsertedScalarizedValues, PHIsToRewrite);
+
+ Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
+ Constant::getNullValue(NPtr->getType()),
+ SCI->getName());
+ SCI->replaceAllUsesWith(New);
+ SCI->eraseFromParent();
+ return;
+ }
+
+ // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
+ assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
+ && "Unexpected GEPI!");
+
+ // Load the pointer for this field.
+ unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
+ Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
+ InsertedScalarizedValues, PHIsToRewrite);
+
+ // Create the new GEP idx vector.
+ SmallVector<Value*, 8> GEPIdx;
+ GEPIdx.push_back(GEPI->getOperand(1));
+ GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
+
+ Value *NGEPI = GetElementPtrInst::Create(NewPtr,
+ GEPIdx.begin(), GEPIdx.end(),
+ GEPI->getName(), GEPI);
+ GEPI->replaceAllUsesWith(NGEPI);
+ GEPI->eraseFromParent();
+ return;
+ }
+
+ // Recursively transform the users of PHI nodes. This will lazily create the
+ // PHIs that are needed for individual elements. Keep track of what PHIs we
+ // see in InsertedScalarizedValues so that we don't get infinite loops (very
+ // antisocial). If the PHI is already in InsertedScalarizedValues, it has
+ // already been seen first by another load, so its uses have already been
+ // processed.
+ PHINode *PN = cast<PHINode>(LoadUser);
+ bool Inserted;
+ DenseMap<Value*, std::vector<Value*> >::iterator InsertPos;
+ tie(InsertPos, Inserted) =
+ InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>()));
+ if (!Inserted) return;
+
+ // If this is the first time we've seen this PHI, recursively process all
+ // users.
+ for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
+ Instruction *User = cast<Instruction>(*UI++);
+ RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
+ }
+}
+
+/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global. Ptr
+/// is a value loaded from the global. Eliminate all uses of Ptr, making them
+/// use FieldGlobals instead. All uses of loaded values satisfy
+/// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
+static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
+ DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+ for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
+ UI != E; ) {
+ Instruction *User = cast<Instruction>(*UI++);
+ RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
+ }
+
+ if (Load->use_empty()) {
+ Load->eraseFromParent();
+ InsertedScalarizedValues.erase(Load);
+ }
+}
+
+/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break
+/// it up into multiple allocations of arrays of the fields.
+static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
+ Value* NElems, TargetData *TD) {
+ DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n');
+ const Type* MAT = getMallocAllocatedType(CI);
+ const StructType *STy = cast<StructType>(MAT);
+
+ // There is guaranteed to be at least one use of the malloc (storing
+ // it into GV). If there are other uses, change them to be uses of
+ // the global to simplify later code. This also deletes the store
+ // into GV.
+ ReplaceUsesOfMallocWithGlobal(CI, GV);
+
+ // Okay, at this point, there are no users of the malloc. Insert N
+ // new mallocs at the same place as CI, and N globals.
+ std::vector<Value*> FieldGlobals;
+ std::vector<Value*> FieldMallocs;
+
+ for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
+ const Type *FieldTy = STy->getElementType(FieldNo);
+ const PointerType *PFieldTy = PointerType::getUnqual(FieldTy);
+
+ GlobalVariable *NGV =
+ new GlobalVariable(*GV->getParent(),
+ PFieldTy, false, GlobalValue::InternalLinkage,
+ Constant::getNullValue(PFieldTy),
+ GV->getName() + ".f" + Twine(FieldNo), GV,
+ GV->isThreadLocal());
+ FieldGlobals.push_back(NGV);
+
+ unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
+ if (const StructType *ST = dyn_cast<StructType>(FieldTy))
+ TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
+ const Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+ Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
+ ConstantInt::get(IntPtrTy, TypeSize),
+ NElems, 0,
+ CI->getName() + ".f" + Twine(FieldNo));
+ FieldMallocs.push_back(NMI);
+ new StoreInst(NMI, NGV, CI);
+ }
+
+ // The tricky aspect of this transformation is handling the case when malloc
+ // fails. In the original code, malloc failing would set the result pointer
+ // of malloc to null. In this case, some mallocs could succeed and others
+ // could fail. As such, we emit code that looks like this:
+ // F0 = malloc(field0)
+ // F1 = malloc(field1)
+ // F2 = malloc(field2)
+ // if (F0 == 0 || F1 == 0 || F2 == 0) {
+ // if (F0) { free(F0); F0 = 0; }
+ // if (F1) { free(F1); F1 = 0; }
+ // if (F2) { free(F2); F2 = 0; }
+ // }
+ // The malloc can also fail if its argument is too large.
+ Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
+ Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
+ ConstantZero, "isneg");
+ for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
+ Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
+ Constant::getNullValue(FieldMallocs[i]->getType()),
+ "isnull");
+ RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
+ }
+
+ // Split the basic block at the old malloc.
+ BasicBlock *OrigBB = CI->getParent();
+ BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
+
+ // Create the block to check the first condition. Put all these blocks at the
+ // end of the function as they are unlikely to be executed.
+ BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
+ "malloc_ret_null",
+ OrigBB->getParent());
+
+ // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
+ // branch on RunningOr.
+ OrigBB->getTerminator()->eraseFromParent();
+ BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
+
+ // Within the NullPtrBlock, we need to emit a comparison and branch for each
+ // pointer, because some may be null while others are not.
+ for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+ Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
+ Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
+ Constant::getNullValue(GVVal->getType()),
+ "tmp");
+ BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
+ OrigBB->getParent());
+ BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
+ OrigBB->getParent());
+ Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
+ Cmp, NullPtrBlock);
+
+ // Fill in FreeBlock.
+ CallInst::CreateFree(GVVal, BI);
+ new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
+ FreeBlock);
+ BranchInst::Create(NextBlock, FreeBlock);
+
+ NullPtrBlock = NextBlock;
+ }
+
+ BranchInst::Create(ContBB, NullPtrBlock);
+
+ // CI is no longer needed, remove it.
+ CI->eraseFromParent();
+
+ /// InsertedScalarizedLoads - As we process loads, if we can't immediately
+ /// update all uses of the load, keep track of what scalarized loads are
+ /// inserted for a given load.
+ DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
+ InsertedScalarizedValues[GV] = FieldGlobals;
+
+ std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
+
+ // Okay, the malloc site is completely handled. All of the uses of GV are now
+ // loads, and all uses of those loads are simple. Rewrite them to use loads
+ // of the per-field globals instead.
+ for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
+ Instruction *User = cast<Instruction>(*UI++);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
+ continue;
+ }
+
+ // Must be a store of null.
+ StoreInst *SI = cast<StoreInst>(User);
+ assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
+ "Unexpected heap-sra user!");
+
+ // Insert a store of null into each global.
+ for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+ const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
+ Constant *Null = Constant::getNullValue(PT->getElementType());
+ new StoreInst(Null, FieldGlobals[i], SI);
+ }
+ // Erase the original store.
+ SI->eraseFromParent();
+ }
+
+ // While we have PHIs that are interesting to rewrite, do it.
+ while (!PHIsToRewrite.empty()) {
+ PHINode *PN = PHIsToRewrite.back().first;
+ unsigned FieldNo = PHIsToRewrite.back().second;
+ PHIsToRewrite.pop_back();
+ PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
+ assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
+
+ // Add all the incoming values. This can materialize more phis.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *InVal = PN->getIncomingValue(i);
+ InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
+ PHIsToRewrite);
+ FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
+ }
+ }
+
+ // Drop all inter-phi links and any loads that made it this far.
+ for (DenseMap<Value*, std::vector<Value*> >::iterator
+ I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+ I != E; ++I) {
+ if (PHINode *PN = dyn_cast<PHINode>(I->first))
+ PN->dropAllReferences();
+ else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+ LI->dropAllReferences();
+ }
+
+ // Delete all the phis and loads now that inter-references are dead.
+ for (DenseMap<Value*, std::vector<Value*> >::iterator
+ I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+ I != E; ++I) {
+ if (PHINode *PN = dyn_cast<PHINode>(I->first))
+ PN->eraseFromParent();
+ else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+ LI->eraseFromParent();
+ }
+
+ // The old global is now dead, remove it.
+ GV->eraseFromParent();
+
+ ++NumHeapSRA;
+ return cast<GlobalVariable>(FieldGlobals[0]);
+}
+
+/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
+/// pointer global variable with a single value stored it that is a malloc or
+/// cast of malloc.
+static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
+ CallInst *CI,
+ const Type *AllocTy,
+ Module::global_iterator &GVI,
+ TargetData *TD) {
+ if (!TD)
+ return false;
+
+ // If this is a malloc of an abstract type, don't touch it.
+ if (!AllocTy->isSized())
+ return false;
+
+ // We can't optimize this global unless all uses of it are *known* to be
+ // of the malloc value, not of the null initializer value (consider a use
+ // that compares the global's value against zero to see if the malloc has
+ // been reached). To do this, we check to see if all uses of the global
+ // would trap if the global were null: this proves that they must all
+ // happen after the malloc.
+ if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
+ return false;
+
+ // We can't optimize this if the malloc itself is used in a complex way,
+ // for example, being stored into multiple globals. This allows the
+ // malloc to be stored into the specified global, loaded setcc'd, and
+ // GEP'd. These are all things we could transform to using the global
+ // for.
+ SmallPtrSet<const PHINode*, 8> PHIs;
+ if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
+ return false;
+
+ // If we have a global that is only initialized with a fixed size malloc,
+ // transform the program to use global memory instead of malloc'd memory.
+ // This eliminates dynamic allocation, avoids an indirection accessing the
+ // data, and exposes the resultant global to further GlobalOpt.
+ // We cannot optimize the malloc if we cannot determine malloc array size.
+ Value *NElems = getMallocArraySize(CI, TD, true);
+ if (!NElems)
+ return false;
+
+ if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
+ // Restrict this transformation to only working on small allocations
+ // (2048 bytes currently), as we don't want to introduce a 16M global or
+ // something.
+ if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
+ GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD);
+ return true;
+ }
+
+ // If the allocation is an array of structures, consider transforming this
+ // into multiple malloc'd arrays, one for each field. This is basically
+ // SRoA for malloc'd memory.
+
+ // If this is an allocation of a fixed size array of structs, analyze as a
+ // variable size array. malloc [100 x struct],1 -> malloc struct, 100
+ if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
+ if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
+ AllocTy = AT->getElementType();
+
+ const StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
+ if (!AllocSTy)
+ return false;
+
+ // This the structure has an unreasonable number of fields, leave it
+ // alone.
+ if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
+ AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
+
+ // If this is a fixed size array, transform the Malloc to be an alloc of
+ // structs. malloc [100 x struct],1 -> malloc struct, 100
+ if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
+ const Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+ unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
+ Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
+ Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
+ Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
+ AllocSize, NumElements,
+ 0, CI->getName());
+ Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
+ CI->replaceAllUsesWith(Cast);
+ CI->eraseFromParent();
+ CI = dyn_cast<BitCastInst>(Malloc) ?
+ extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc);
+ }
+
+ GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD);
+ return true;
+ }
+
+ return false;
+}
+
+// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
+// that only one value (besides its initializer) is ever stored to the global.
+static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
+ Module::global_iterator &GVI,
+ TargetData *TD) {
+ // Ignore no-op GEPs and bitcasts.
+ StoredOnceVal = StoredOnceVal->stripPointerCasts();
+
+ // If we are dealing with a pointer global that is initialized to null and
+ // only has one (non-null) value stored into it, then we can optimize any
+ // users of the loaded value (often calls and loads) that would trap if the
+ // value was null.
+ if (GV->getInitializer()->getType()->isPointerTy() &&
+ GV->getInitializer()->isNullValue()) {
+ if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
+ if (GV->getInitializer()->getType() != SOVC->getType())
+ SOVC =
+ ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
+
+ // Optimize away any trapping uses of the loaded value.
+ if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC))
+ return true;
+ } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
+ const Type* MallocType = getMallocAllocatedType(CI);
+ if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
+ GVI, TD))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only
+/// two values ever stored into GV are its initializer and OtherVal. See if we
+/// can shrink the global into a boolean and select between the two values
+/// whenever it is used. This exposes the values to other scalar optimizations.
+static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
+ const Type *GVElType = GV->getType()->getElementType();
+
+ // If GVElType is already i1, it is already shrunk. If the type of the GV is
+ // an FP value, pointer or vector, don't do this optimization because a select
+ // between them is very expensive and unlikely to lead to later
+ // simplification. In these cases, we typically end up with "cond ? v1 : v2"
+ // where v1 and v2 both require constant pool loads, a big loss.
+ if (GVElType == Type::getInt1Ty(GV->getContext()) ||
+ GVElType->isFloatingPointTy() ||
+ GVElType->isPointerTy() || GVElType->isVectorTy())
+ return false;
+
+ // Walk the use list of the global seeing if all the uses are load or store.
+ // If there is anything else, bail out.
+ for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){
+ User *U = *I;
+ if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
+ return false;
+ }
+
+ DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV);
+
+ // Create the new global, initializing it to false.
+ GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
+ false,
+ GlobalValue::InternalLinkage,
+ ConstantInt::getFalse(GV->getContext()),
+ GV->getName()+".b",
+ GV->isThreadLocal());
+ GV->getParent()->getGlobalList().insert(GV, NewGV);
+
+ Constant *InitVal = GV->getInitializer();
+ assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
+ "No reason to shrink to bool!");
+
+ // If initialized to zero and storing one into the global, we can use a cast
+ // instead of a select to synthesize the desired value.
+ bool IsOneZero = false;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal))
+ IsOneZero = InitVal->isNullValue() && CI->isOne();
+
+ while (!GV->use_empty()) {
+ Instruction *UI = cast<Instruction>(GV->use_back());
+ if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
+ // Change the store into a boolean store.
+ bool StoringOther = SI->getOperand(0) == OtherVal;
+ // Only do this if we weren't storing a loaded value.
+ Value *StoreVal;
+ if (StoringOther || SI->getOperand(0) == InitVal)
+ StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
+ StoringOther);
+ else {
+ // Otherwise, we are storing a previously loaded copy. To do this,
+ // change the copy from copying the original value to just copying the
+ // bool.
+ Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
+
+ // If we've already replaced the input, StoredVal will be a cast or
+ // select instruction. If not, it will be a load of the original
+ // global.
+ if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
+ assert(LI->getOperand(0) == GV && "Not a copy!");
+ // Insert a new load, to preserve the saved value.
+ StoreVal = new LoadInst(NewGV, LI->getName()+".b", LI);
+ } else {
+ assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
+ "This is not a form that we understand!");
+ StoreVal = StoredVal->getOperand(0);
+ assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
+ }
+ }
+ new StoreInst(StoreVal, NewGV, SI);
+ } else {
+ // Change the load into a load of bool then a select.
+ LoadInst *LI = cast<LoadInst>(UI);
+ LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", LI);
+ Value *NSI;
+ if (IsOneZero)
+ NSI = new ZExtInst(NLI, LI->getType(), "", LI);
+ else
+ NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
+ NSI->takeName(LI);
+ LI->replaceAllUsesWith(NSI);
+ }
+ UI->eraseFromParent();
+ }
+
+ GV->eraseFromParent();
+ return true;
+}
+
+
+/// ProcessInternalGlobal - Analyze the specified global variable and optimize
+/// it if possible. If we make a change, return true.
+bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
+ Module::global_iterator &GVI) {
+ SmallPtrSet<const PHINode*, 16> PHIUsers;
+ GlobalStatus GS;
+ GV->removeDeadConstantUsers();
+
+ if (GV->use_empty()) {
+ DEBUG(dbgs() << "GLOBAL DEAD: " << *GV);
+ GV->eraseFromParent();
+ ++NumDeleted;
+ return true;
+ }
+
+ if (!AnalyzeGlobal(GV, GS, PHIUsers)) {
+#if 0
+ DEBUG(dbgs() << "Global: " << *GV);
+ DEBUG(dbgs() << " isLoaded = " << GS.isLoaded << "\n");
+ DEBUG(dbgs() << " StoredType = ");
+ switch (GS.StoredType) {
+ case GlobalStatus::NotStored: DEBUG(dbgs() << "NEVER STORED\n"); break;
+ case GlobalStatus::isInitializerStored: DEBUG(dbgs() << "INIT STORED\n");
+ break;
+ case GlobalStatus::isStoredOnce: DEBUG(dbgs() << "STORED ONCE\n"); break;
+ case GlobalStatus::isStored: DEBUG(dbgs() << "stored\n"); break;
+ }
+ if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue)
+ DEBUG(dbgs() << " StoredOnceValue = " << *GS.StoredOnceValue << "\n");
+ if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions)
+ DEBUG(dbgs() << " AccessingFunction = "
+ << GS.AccessingFunction->getName() << "\n");
+ DEBUG(dbgs() << " HasMultipleAccessingFunctions = "
+ << GS.HasMultipleAccessingFunctions << "\n");
+ DEBUG(dbgs() << " HasNonInstructionUser = "
+ << GS.HasNonInstructionUser<<"\n");
+ DEBUG(dbgs() << "\n");
+#endif
+
+ // If this is a first class global and has only one accessing function
+ // and this function is main (which we know is not recursive we can make
+ // this global a local variable) we replace the global with a local alloca
+ // in this function.
+ //
+ // NOTE: It doesn't make sense to promote non single-value types since we
+ // are just replacing static memory to stack memory.
+ //
+ // If the global is in different address space, don't bring it to stack.
+ if (!GS.HasMultipleAccessingFunctions &&
+ GS.AccessingFunction && !GS.HasNonInstructionUser &&
+ GV->getType()->getElementType()->isSingleValueType() &&
+ GS.AccessingFunction->getName() == "main" &&
+ GS.AccessingFunction->hasExternalLinkage() &&
+ GV->getType()->getAddressSpace() == 0) {
+ DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
+ Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction
+ ->getEntryBlock().begin());
+ const Type* ElemTy = GV->getType()->getElementType();
+ // FIXME: Pass Global's alignment when globals have alignment
+ AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
+ if (!isa<UndefValue>(GV->getInitializer()))
+ new StoreInst(GV->getInitializer(), Alloca, &FirstI);
+
+ GV->replaceAllUsesWith(Alloca);
+ GV->eraseFromParent();
+ ++NumLocalized;
+ return true;
+ }
+
+ // If the global is never loaded (but may be stored to), it is dead.
+ // Delete it now.
+ if (!GS.isLoaded) {
+ DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
+
+ // Delete any stores we can find to the global. We may not be able to
+ // make it completely dead though.
+ bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer());
+
+ // If the global is dead now, delete it.
+ if (GV->use_empty()) {
+ GV->eraseFromParent();
+ ++NumDeleted;
+ Changed = true;
+ }
+ return Changed;
+
+ } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
+ DEBUG(dbgs() << "MARKING CONSTANT: " << *GV);
+ GV->setConstant(true);
+
+ // Clean up any obviously simplifiable users now.
+ CleanupConstantGlobalUsers(GV, GV->getInitializer());
+
+ // If the global is dead now, just nuke it.
+ if (GV->use_empty()) {
+ DEBUG(dbgs() << " *** Marking constant allowed us to simplify "
+ << "all users and delete global!\n");
+ GV->eraseFromParent();
+ ++NumDeleted;
+ }
+
+ ++NumMarked;
+ return true;
+ } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
+ if (TargetData *TD = getAnalysisIfAvailable<TargetData>())
+ if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) {
+ GVI = FirstNewGV; // Don't skip the newly produced globals!
+ return true;
+ }
+ } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
+ // If the initial value for the global was an undef value, and if only
+ // one other value was stored into it, we can just change the
+ // initializer to be the stored value, then delete all stores to the
+ // global. This allows us to mark it constant.
+ if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
+ if (isa<UndefValue>(GV->getInitializer())) {
+ // Change the initial value here.
+ GV->setInitializer(SOVConstant);
+
+ // Clean up any obviously simplifiable users now.
+ CleanupConstantGlobalUsers(GV, GV->getInitializer());
+
+ if (GV->use_empty()) {
+ DEBUG(dbgs() << " *** Substituting initializer allowed us to "
+ << "simplify all users and delete global!\n");
+ GV->eraseFromParent();
+ ++NumDeleted;
+ } else {
+ GVI = GV;
+ }
+ ++NumSubstitute;
+ return true;
+ }
+
+ // Try to optimize globals based on the knowledge that only one value
+ // (besides its initializer) is ever stored to the global.
+ if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI,
+ getAnalysisIfAvailable<TargetData>()))
+ return true;
+
+ // Otherwise, if the global was not a boolean, we can shrink it to be a
+ // boolean.
+ if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
+ if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
+ ++NumShrunkToBool;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified
+/// function, changing them to FastCC.
+static void ChangeCalleesToFastCall(Function *F) {
+ for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
+ CallSite User(cast<Instruction>(*UI));
+ User.setCallingConv(CallingConv::Fast);
+ }
+}
+
+static AttrListPtr StripNest(const AttrListPtr &Attrs) {
+ for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
+ if ((Attrs.getSlot(i).Attrs & Attribute::Nest) == 0)
+ continue;
+
+ // There can be only one.
+ return Attrs.removeAttr(Attrs.getSlot(i).Index, Attribute::Nest);
+ }
+
+ return Attrs;
+}
+
+static void RemoveNestAttribute(Function *F) {
+ F->setAttributes(StripNest(F->getAttributes()));
+ for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
+ CallSite User(cast<Instruction>(*UI));
+ User.setAttributes(StripNest(User.getAttributes()));
+ }
+}
+
+bool GlobalOpt::OptimizeFunctions(Module &M) {
+ bool Changed = false;
+ // Optimize functions.
+ for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
+ Function *F = FI++;
+ // Functions without names cannot be referenced outside this module.
+ if (!F->hasName() && !F->isDeclaration())
+ F->setLinkage(GlobalValue::InternalLinkage);
+ F->removeDeadConstantUsers();
+ if (F->use_empty() && (F->hasLocalLinkage() || F->hasLinkOnceLinkage())) {
+ F->eraseFromParent();
+ Changed = true;
+ ++NumFnDeleted;
+ } else if (F->hasLocalLinkage()) {
+ if (F->getCallingConv() == CallingConv::C && !F->isVarArg() &&
+ !F->hasAddressTaken()) {
+ // If this function has C calling conventions, is not a varargs
+ // function, and is only called directly, promote it to use the Fast
+ // calling convention.
+ F->setCallingConv(CallingConv::Fast);
+ ChangeCalleesToFastCall(F);
+ ++NumFastCallFns;
+ Changed = true;
+ }
+
+ if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
+ !F->hasAddressTaken()) {
+ // The function is not used by a trampoline intrinsic, so it is safe
+ // to remove the 'nest' attribute.
+ RemoveNestAttribute(F);
+ ++NumNestRemoved;
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+}
+
+bool GlobalOpt::OptimizeGlobalVars(Module &M) {
+ bool Changed = false;
+ for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+ GVI != E; ) {
+ GlobalVariable *GV = GVI++;
+ // Global variables without names cannot be referenced outside this module.
+ if (!GV->hasName() && !GV->isDeclaration())
+ GV->setLinkage(GlobalValue::InternalLinkage);
+ // Simplify the initializer.
+ if (GV->hasInitializer())
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
+ TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ Constant *New = ConstantFoldConstantExpression(CE, TD);
+ if (New && New != CE)
+ GV->setInitializer(New);
+ }
+ // Do more involved optimizations if the global is internal.
+ if (!GV->isConstant() && GV->hasLocalLinkage() &&
+ GV->hasInitializer())
+ Changed |= ProcessInternalGlobal(GV, GVI);
+ }
+ return Changed;
+}
+
+/// FindGlobalCtors - Find the llvm.globalctors list, verifying that all
+/// initializers have an init priority of 65535.
+GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (I->getName() == "llvm.global_ctors") {
+ // Found it, verify it's an array of { int, void()* }.
+ const ArrayType *ATy =dyn_cast<ArrayType>(I->getType()->getElementType());
+ if (!ATy) return 0;
+ const StructType *STy = dyn_cast<StructType>(ATy->getElementType());
+ if (!STy || STy->getNumElements() != 2 ||
+ !STy->getElementType(0)->isIntegerTy(32)) return 0;
+ const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1));
+ if (!PFTy) return 0;
+ const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType());
+ if (!FTy || !FTy->getReturnType()->isVoidTy() ||
+ FTy->isVarArg() || FTy->getNumParams() != 0)
+ return 0;
+
+ // Verify that the initializer is simple enough for us to handle.
+ if (!I->hasDefinitiveInitializer()) return 0;
+ ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer());
+ if (!CA) return 0;
+ for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(*i)) {
+ if (isa<ConstantPointerNull>(CS->getOperand(1)))
+ continue;
+
+ // Must have a function or null ptr.
+ if (!isa<Function>(CS->getOperand(1)))
+ return 0;
+
+ // Init priority must be standard.
+ ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0));
+ if (!CI || CI->getZExtValue() != 65535)
+ return 0;
+ } else {
+ return 0;
+ }
+
+ return I;
+ }
+ return 0;
+}
+
+/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand,
+/// return a list of the functions and null terminator as a vector.
+static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
+ ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
+ std::vector<Function*> Result;
+ Result.reserve(CA->getNumOperands());
+ for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
+ ConstantStruct *CS = cast<ConstantStruct>(*i);
+ Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
+ }
+ return Result;
+}
+
+/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
+/// specified array, returning the new global to use.
+static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
+ const std::vector<Function*> &Ctors) {
+ // If we made a change, reassemble the initializer list.
+ std::vector<Constant*> CSVals;
+ CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535));
+ CSVals.push_back(0);
+
+ // Create the new init list.
+ std::vector<Constant*> CAList;
+ for (unsigned i = 0, e = Ctors.size(); i != e; ++i) {
+ if (Ctors[i]) {
+ CSVals[1] = Ctors[i];
+ } else {
+ const Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()),
+ false);
+ const PointerType *PFTy = PointerType::getUnqual(FTy);
+ CSVals[1] = Constant::getNullValue(PFTy);
+ CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()),
+ 2147483647);
+ }
+ CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false));
+ }
+
+ // Create the array initializer.
+ const Type *StructTy =
+ cast<ArrayType>(GCL->getType()->getElementType())->getElementType();
+ Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
+ CAList.size()), CAList);
+
+ // If we didn't change the number of elements, don't create a new GV.
+ if (CA->getType() == GCL->getInitializer()->getType()) {
+ GCL->setInitializer(CA);
+ return GCL;
+ }
+
+ // Create the new global and insert it next to the existing list.
+ GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
+ GCL->getLinkage(), CA, "",
+ GCL->isThreadLocal());
+ GCL->getParent()->getGlobalList().insert(GCL, NGV);
+ NGV->takeName(GCL);
+
+ // Nuke the old list, replacing any uses with the new one.
+ if (!GCL->use_empty()) {
+ Constant *V = NGV;
+ if (V->getType() != GCL->getType())
+ V = ConstantExpr::getBitCast(V, GCL->getType());
+ GCL->replaceAllUsesWith(V);
+ }
+ GCL->eraseFromParent();
+
+ if (Ctors.size())
+ return NGV;
+ else
+ return 0;
+}
+
+
+static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues,
+ Value *V) {
+ if (Constant *CV = dyn_cast<Constant>(V)) return CV;
+ Constant *R = ComputedValues[V];
+ assert(R && "Reference to an uncomputed value!");
+ return R;
+}
+
+/// isSimpleEnoughPointerToCommit - Return true if this constant is simple
+/// enough for us to understand. In particular, if it is a cast of something,
+/// we punt. We basically just support direct accesses to globals and GEP's of
+/// globals. This should be kept up to date with CommitValueTo.
+static bool isSimpleEnoughPointerToCommit(Constant *C) {
+ // Conservatively, avoid aggregate types. This is because we don't
+ // want to worry about them partially overlapping other stores.
+ if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
+ return false;
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+ // Do not allow weak/linkonce/dllimport/dllexport linkage or
+ // external globals.
+ return GV->hasDefinitiveInitializer();
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ // Handle a constantexpr gep.
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ isa<GlobalVariable>(CE->getOperand(0)) &&
+ cast<GEPOperator>(CE)->isInBounds()) {
+ GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+ // Do not allow weak/linkonce/dllimport/dllexport linkage or
+ // external globals.
+ if (!GV->hasDefinitiveInitializer())
+ return false;
+
+ // The first index must be zero.
+ ConstantInt *CI = dyn_cast<ConstantInt>(*llvm::next(CE->op_begin()));
+ if (!CI || !CI->isZero()) return false;
+
+ // The remaining indices must be compile-time known integers within the
+ // notional bounds of the corresponding static array types.
+ if (!CE->isGEPWithNoNotionalOverIndexing())
+ return false;
+
+ return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+ }
+ return false;
+}
+
+/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global
+/// initializer. This returns 'Init' modified to reflect 'Val' stored into it.
+/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
+static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
+ ConstantExpr *Addr, unsigned OpNo) {
+ // Base case of the recursion.
+ if (OpNo == Addr->getNumOperands()) {
+ assert(Val->getType() == Init->getType() && "Type mismatch!");
+ return Val;
+ }
+
+ std::vector<Constant*> Elts;
+ if (const StructType *STy = dyn_cast<StructType>(Init->getType())) {
+
+ // Break up the constant into its elements.
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) {
+ for (User::op_iterator i = CS->op_begin(), e = CS->op_end(); i != e; ++i)
+ Elts.push_back(cast<Constant>(*i));
+ } else if (isa<ConstantAggregateZero>(Init)) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ Elts.push_back(Constant::getNullValue(STy->getElementType(i)));
+ } else if (isa<UndefValue>(Init)) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ Elts.push_back(UndefValue::get(STy->getElementType(i)));
+ } else {
+ llvm_unreachable("This code is out of sync with "
+ " ConstantFoldLoadThroughGEPConstantExpr");
+ }
+
+ // Replace the element that we are supposed to.
+ ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
+ unsigned Idx = CU->getZExtValue();
+ assert(Idx < STy->getNumElements() && "Struct index out of range!");
+ Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
+
+ // Return the modified struct.
+ return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(),
+ STy->isPacked());
+ } else {
+ ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
+ const SequentialType *InitTy = cast<SequentialType>(Init->getType());
+
+ uint64_t NumElts;
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
+ NumElts = ATy->getNumElements();
+ else
+ NumElts = cast<VectorType>(InitTy)->getNumElements();
+
+
+ // Break up the array into elements.
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
+ for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
+ Elts.push_back(cast<Constant>(*i));
+ } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) {
+ for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i)
+ Elts.push_back(cast<Constant>(*i));
+ } else if (isa<ConstantAggregateZero>(Init)) {
+ Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType()));
+ } else {
+ assert(isa<UndefValue>(Init) && "This code is out of sync with "
+ " ConstantFoldLoadThroughGEPConstantExpr");
+ Elts.assign(NumElts, UndefValue::get(InitTy->getElementType()));
+ }
+
+ assert(CI->getZExtValue() < NumElts);
+ Elts[CI->getZExtValue()] =
+ EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
+
+ if (Init->getType()->isArrayTy())
+ return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
+ else
+ return ConstantVector::get(&Elts[0], Elts.size());
+ }
+}
+
+/// CommitValueTo - We have decided that Addr (which satisfies the predicate
+/// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen.
+static void CommitValueTo(Constant *Val, Constant *Addr) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
+ assert(GV->hasInitializer());
+ GV->setInitializer(Val);
+ return;
+ }
+
+ ConstantExpr *CE = cast<ConstantExpr>(Addr);
+ GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+ GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
+}
+
+/// ComputeLoadResult - Return the value that would be computed by a load from
+/// P after the stores reflected by 'memory' have been performed. If we can't
+/// decide, return null.
+static Constant *ComputeLoadResult(Constant *P,
+ const DenseMap<Constant*, Constant*> &Memory) {
+ // If this memory location has been recently stored, use the stored value: it
+ // is the most up-to-date.
+ DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P);
+ if (I != Memory.end()) return I->second;
+
+ // Access it.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
+ if (GV->hasDefinitiveInitializer())
+ return GV->getInitializer();
+ return 0;
+ }
+
+ // Handle a constantexpr getelementptr.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ isa<GlobalVariable>(CE->getOperand(0))) {
+ GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+ if (GV->hasDefinitiveInitializer())
+ return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+ }
+
+ return 0; // don't know how to evaluate.
+}
+
+/// EvaluateFunction - Evaluate a call to function F, returning true if
+/// successful, false if we can't evaluate it. ActualArgs contains the formal
+/// arguments for the function.
+static bool EvaluateFunction(Function *F, Constant *&RetVal,
+ const SmallVectorImpl<Constant*> &ActualArgs,
+ std::vector<Function*> &CallStack,
+ DenseMap<Constant*, Constant*> &MutatedMemory,
+ std::vector<GlobalVariable*> &AllocaTmps) {
+ // Check to see if this function is already executing (recursion). If so,
+ // bail out. TODO: we might want to accept limited recursion.
+ if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
+ return false;
+
+ CallStack.push_back(F);
+
+ /// Values - As we compute SSA register values, we store their contents here.
+ DenseMap<Value*, Constant*> Values;
+
+ // Initialize arguments to the incoming values specified.
+ unsigned ArgNo = 0;
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
+ ++AI, ++ArgNo)
+ Values[AI] = ActualArgs[ArgNo];
+
+ /// ExecutedBlocks - We only handle non-looping, non-recursive code. As such,
+ /// we can only evaluate any one basic block at most once. This set keeps
+ /// track of what we have executed so we can detect recursive cases etc.
+ SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
+
+ // CurInst - The current instruction we're evaluating.
+ BasicBlock::iterator CurInst = F->begin()->begin();
+
+ // This is the main evaluation loop.
+ while (1) {
+ Constant *InstResult = 0;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
+ if (SI->isVolatile()) return false; // no volatile accesses.
+ Constant *Ptr = getVal(Values, SI->getOperand(1));
+ if (!isSimpleEnoughPointerToCommit(Ptr))
+ // If this is too complex for us to commit, reject it.
+ return false;
+ Constant *Val = getVal(Values, SI->getOperand(0));
+ MutatedMemory[Ptr] = Val;
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
+ InstResult = ConstantExpr::get(BO->getOpcode(),
+ getVal(Values, BO->getOperand(0)),
+ getVal(Values, BO->getOperand(1)));
+ } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
+ InstResult = ConstantExpr::getCompare(CI->getPredicate(),
+ getVal(Values, CI->getOperand(0)),
+ getVal(Values, CI->getOperand(1)));
+ } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
+ InstResult = ConstantExpr::getCast(CI->getOpcode(),
+ getVal(Values, CI->getOperand(0)),
+ CI->getType());
+ } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
+ InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)),
+ getVal(Values, SI->getOperand(1)),
+ getVal(Values, SI->getOperand(2)));
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
+ Constant *P = getVal(Values, GEP->getOperand(0));
+ SmallVector<Constant*, 8> GEPOps;
+ for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
+ i != e; ++i)
+ GEPOps.push_back(getVal(Values, *i));
+ InstResult = cast<GEPOperator>(GEP)->isInBounds() ?
+ ConstantExpr::getInBoundsGetElementPtr(P, &GEPOps[0], GEPOps.size()) :
+ ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size());
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
+ if (LI->isVolatile()) return false; // no volatile accesses.
+ InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)),
+ MutatedMemory);
+ if (InstResult == 0) return false; // Could not evaluate load.
+ } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
+ if (AI->isArrayAllocation()) return false; // Cannot handle array allocs.
+ const Type *Ty = AI->getType()->getElementType();
+ AllocaTmps.push_back(new GlobalVariable(Ty, false,
+ GlobalValue::InternalLinkage,
+ UndefValue::get(Ty),
+ AI->getName()));
+ InstResult = AllocaTmps.back();
+ } else if (CallInst *CI = dyn_cast<CallInst>(CurInst)) {
+
+ // Debug info can safely be ignored here.
+ if (isa<DbgInfoIntrinsic>(CI)) {
+ ++CurInst;
+ continue;
+ }
+
+ // Cannot handle inline asm.
+ if (isa<InlineAsm>(CI->getCalledValue())) return false;
+
+ // Resolve function pointers.
+ Function *Callee = dyn_cast<Function>(getVal(Values,
+ CI->getCalledValue()));
+ if (!Callee) return false; // Cannot resolve.
+
+ SmallVector<Constant*, 8> Formals;
+ CallSite CS(CI);
+ for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i)
+ Formals.push_back(getVal(Values, *i));
+
+ if (Callee->isDeclaration()) {
+ // If this is a function we can constant fold, do it.
+ if (Constant *C = ConstantFoldCall(Callee, Formals.data(),
+ Formals.size())) {
+ InstResult = C;
+ } else {
+ return false;
+ }
+ } else {
+ if (Callee->getFunctionType()->isVarArg())
+ return false;
+
+ Constant *RetVal;
+ // Execute the call, if successful, use the return value.
+ if (!EvaluateFunction(Callee, RetVal, Formals, CallStack,
+ MutatedMemory, AllocaTmps))
+ return false;
+ InstResult = RetVal;
+ }
+ } else if (isa<TerminatorInst>(CurInst)) {
+ BasicBlock *NewBB = 0;
+ if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
+ if (BI->isUnconditional()) {
+ NewBB = BI->getSuccessor(0);
+ } else {
+ ConstantInt *Cond =
+ dyn_cast<ConstantInt>(getVal(Values, BI->getCondition()));
+ if (!Cond) return false; // Cannot determine.
+
+ NewBB = BI->getSuccessor(!Cond->getZExtValue());
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
+ ConstantInt *Val =
+ dyn_cast<ConstantInt>(getVal(Values, SI->getCondition()));
+ if (!Val) return false; // Cannot determine.
+ NewBB = SI->getSuccessor(SI->findCaseValue(Val));
+ } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
+ Value *Val = getVal(Values, IBI->getAddress())->stripPointerCasts();
+ if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
+ NewBB = BA->getBasicBlock();
+ else
+ return false; // Cannot determine.
+ } else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) {
+ if (RI->getNumOperands())
+ RetVal = getVal(Values, RI->getOperand(0));
+
+ CallStack.pop_back(); // return from fn.
+ return true; // We succeeded at evaluating this ctor!
+ } else {
+ // invoke, unwind, unreachable.
+ return false; // Cannot handle this terminator.
+ }
+
+ // Okay, we succeeded in evaluating this control flow. See if we have
+ // executed the new block before. If so, we have a looping function,
+ // which we cannot evaluate in reasonable time.
+ if (!ExecutedBlocks.insert(NewBB))
+ return false; // looped!
+
+ // Okay, we have never been in this block before. Check to see if there
+ // are any PHI nodes. If so, evaluate them with information about where
+ // we came from.
+ BasicBlock *OldBB = CurInst->getParent();
+ CurInst = NewBB->begin();
+ PHINode *PN;
+ for (; (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
+ Values[PN] = getVal(Values, PN->getIncomingValueForBlock(OldBB));
+
+ // Do NOT increment CurInst. We know that the terminator had no value.
+ continue;
+ } else {
+ // Did not know how to evaluate this!
+ return false;
+ }
+
+ if (!CurInst->use_empty())
+ Values[CurInst] = InstResult;
+
+ // Advance program counter.
+ ++CurInst;
+ }
+}
+
+/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
+/// we can. Return true if we can, false otherwise.
+static bool EvaluateStaticConstructor(Function *F) {
+ /// MutatedMemory - For each store we execute, we update this map. Loads
+ /// check this to get the most up-to-date value. If evaluation is successful,
+ /// this state is committed to the process.
+ DenseMap<Constant*, Constant*> MutatedMemory;
+
+ /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
+ /// to represent its body. This vector is needed so we can delete the
+ /// temporary globals when we are done.
+ std::vector<GlobalVariable*> AllocaTmps;
+
+ /// CallStack - This is used to detect recursion. In pathological situations
+ /// we could hit exponential behavior, but at least there is nothing
+ /// unbounded.
+ std::vector<Function*> CallStack;
+
+ // Call the function.
+ Constant *RetValDummy;
+ bool EvalSuccess = EvaluateFunction(F, RetValDummy,
+ SmallVector<Constant*, 0>(), CallStack,
+ MutatedMemory, AllocaTmps);
+ if (EvalSuccess) {
+ // We succeeded at evaluation: commit the result.
+ DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
+ << F->getName() << "' to " << MutatedMemory.size()
+ << " stores.\n");
+ for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(),
+ E = MutatedMemory.end(); I != E; ++I)
+ CommitValueTo(I->second, I->first);
+ }
+
+ // At this point, we are done interpreting. If we created any 'alloca'
+ // temporaries, release them now.
+ while (!AllocaTmps.empty()) {
+ GlobalVariable *Tmp = AllocaTmps.back();
+ AllocaTmps.pop_back();
+
+ // If there are still users of the alloca, the program is doing something
+ // silly, e.g. storing the address of the alloca somewhere and using it
+ // later. Since this is undefined, we'll just make it be null.
+ if (!Tmp->use_empty())
+ Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
+ delete Tmp;
+ }
+
+ return EvalSuccess;
+}
+
+
+
+/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible.
+/// Return true if anything changed.
+bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
+ std::vector<Function*> Ctors = ParseGlobalCtors(GCL);
+ bool MadeChange = false;
+ if (Ctors.empty()) return false;
+
+ // Loop over global ctors, optimizing them when we can.
+ for (unsigned i = 0; i != Ctors.size(); ++i) {
+ Function *F = Ctors[i];
+ // Found a null terminator in the middle of the list, prune off the rest of
+ // the list.
+ if (F == 0) {
+ if (i != Ctors.size()-1) {
+ Ctors.resize(i+1);
+ MadeChange = true;
+ }
+ break;
+ }
+
+ // We cannot simplify external ctor functions.
+ if (F->empty()) continue;
+
+ // If we can evaluate the ctor at compile time, do.
+ if (EvaluateStaticConstructor(F)) {
+ Ctors.erase(Ctors.begin()+i);
+ MadeChange = true;
+ --i;
+ ++NumCtorsEvaluated;
+ continue;
+ }
+ }
+
+ if (!MadeChange) return false;
+
+ GCL = InstallGlobalCtors(GCL, Ctors);
+ return true;
+}
+
+bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
+ bool Changed = false;
+
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E;) {
+ Module::alias_iterator J = I++;
+ // Aliases without names cannot be referenced outside this module.
+ if (!J->hasName() && !J->isDeclaration())
+ J->setLinkage(GlobalValue::InternalLinkage);
+ // If the aliasee may change at link time, nothing can be done - bail out.
+ if (J->mayBeOverridden())
+ continue;
+
+ Constant *Aliasee = J->getAliasee();
+ GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
+ Target->removeDeadConstantUsers();
+ bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse();
+
+ // Make all users of the alias use the aliasee instead.
+ if (!J->use_empty()) {
+ J->replaceAllUsesWith(Aliasee);
+ ++NumAliasesResolved;
+ Changed = true;
+ }
+
+ // If the alias is externally visible, we may still be able to simplify it.
+ if (!J->hasLocalLinkage()) {
+ // If the aliasee has internal linkage, give it the name and linkage
+ // of the alias, and delete the alias. This turns:
+ // define internal ... @f(...)
+ // @a = alias ... @f
+ // into:
+ // define ... @a(...)
+ if (!Target->hasLocalLinkage())
+ continue;
+
+ // Do not perform the transform if multiple aliases potentially target the
+ // aliasee. This check also ensures that it is safe to replace the section
+ // and other attributes of the aliasee with those of the alias.
+ if (!hasOneUse)
+ continue;
+
+ // Give the aliasee the name, linkage and other attributes of the alias.
+ Target->takeName(J);
+ Target->setLinkage(J->getLinkage());
+ Target->GlobalValue::copyAttributesFrom(J);
+ }
+
+ // Delete the alias.
+ M.getAliasList().erase(J);
+ ++NumAliasesRemoved;
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+bool GlobalOpt::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // Try to find the llvm.globalctors list.
+ GlobalVariable *GlobalCtors = FindGlobalCtors(M);
+
+ bool LocalChange = true;
+ while (LocalChange) {
+ LocalChange = false;
+
+ // Delete functions that are trivially dead, ccc -> fastcc
+ LocalChange |= OptimizeFunctions(M);
+
+ // Optimize global_ctors list.
+ if (GlobalCtors)
+ LocalChange |= OptimizeGlobalCtorsList(GlobalCtors);
+
+ // Optimize non-address-taken globals.
+ LocalChange |= OptimizeGlobalVars(M);
+
+ // Resolve aliases, when possible.
+ LocalChange |= OptimizeGlobalAliases(M);
+ Changed |= LocalChange;
+ }
+
+ // TODO: Move all global ctors functions to the end of the module for code
+ // layout.
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
new file mode 100644
index 0000000..1b3cf78
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -0,0 +1,277 @@
+//===-- IPConstantPropagation.cpp - Propagate constants through calls -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements an _extremely_ simple interprocedural constant
+// propagation pass. It could certainly be improved in many different ways,
+// like using a worklist. This pass makes arguments dead, but does not remove
+// them. The existing dead argument elimination pass should be run after this
+// to clean up the mess.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ipconstprop"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+STATISTIC(NumArgumentsProped, "Number of args turned into constants");
+STATISTIC(NumReturnValProped, "Number of return values turned into constants");
+
+namespace {
+ /// IPCP - The interprocedural constant propagation pass
+ ///
+ struct IPCP : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ IPCP() : ModulePass(ID) {}
+
+ bool runOnModule(Module &M);
+ private:
+ bool PropagateConstantsIntoArguments(Function &F);
+ bool PropagateConstantReturn(Function &F);
+ };
+}
+
+char IPCP::ID = 0;
+INITIALIZE_PASS(IPCP, "ipconstprop",
+ "Interprocedural constant propagation", false, false);
+
+ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); }
+
+bool IPCP::runOnModule(Module &M) {
+ bool Changed = false;
+ bool LocalChange = true;
+
+ // FIXME: instead of using smart algorithms, we just iterate until we stop
+ // making changes.
+ while (LocalChange) {
+ LocalChange = false;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration()) {
+ // Delete any klingons.
+ I->removeDeadConstantUsers();
+ if (I->hasLocalLinkage())
+ LocalChange |= PropagateConstantsIntoArguments(*I);
+ Changed |= PropagateConstantReturn(*I);
+ }
+ Changed |= LocalChange;
+ }
+ return Changed;
+}
+
+/// PropagateConstantsIntoArguments - Look at all uses of the specified
+/// function. If all uses are direct call sites, and all pass a particular
+/// constant in for an argument, propagate that constant in as the argument.
+///
+bool IPCP::PropagateConstantsIntoArguments(Function &F) {
+ if (F.arg_empty() || F.use_empty()) return false; // No arguments? Early exit.
+
+ // For each argument, keep track of its constant value and whether it is a
+ // constant or not. The bool is driven to true when found to be non-constant.
+ SmallVector<std::pair<Constant*, bool>, 16> ArgumentConstants;
+ ArgumentConstants.resize(F.arg_size());
+
+ unsigned NumNonconstant = 0;
+ for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) {
+ User *U = *UI;
+ // Ignore blockaddress uses.
+ if (isa<BlockAddress>(U)) continue;
+
+ // Used by a non-instruction, or not the callee of a function, do not
+ // transform.
+ if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
+ return false;
+
+ CallSite CS(cast<Instruction>(U));
+ if (!CS.isCallee(UI))
+ return false;
+
+ // Check out all of the potentially constant arguments. Note that we don't
+ // inspect varargs here.
+ CallSite::arg_iterator AI = CS.arg_begin();
+ Function::arg_iterator Arg = F.arg_begin();
+ for (unsigned i = 0, e = ArgumentConstants.size(); i != e;
+ ++i, ++AI, ++Arg) {
+
+ // If this argument is known non-constant, ignore it.
+ if (ArgumentConstants[i].second)
+ continue;
+
+ Constant *C = dyn_cast<Constant>(*AI);
+ if (C && ArgumentConstants[i].first == 0) {
+ ArgumentConstants[i].first = C; // First constant seen.
+ } else if (C && ArgumentConstants[i].first == C) {
+ // Still the constant value we think it is.
+ } else if (*AI == &*Arg) {
+ // Ignore recursive calls passing argument down.
+ } else {
+ // Argument became non-constant. If all arguments are non-constant now,
+ // give up on this function.
+ if (++NumNonconstant == ArgumentConstants.size())
+ return false;
+ ArgumentConstants[i].second = true;
+ }
+ }
+ }
+
+ // If we got to this point, there is a constant argument!
+ assert(NumNonconstant != ArgumentConstants.size());
+ bool MadeChange = false;
+ Function::arg_iterator AI = F.arg_begin();
+ for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) {
+ // Do we have a constant argument?
+ if (ArgumentConstants[i].second || AI->use_empty() ||
+ (AI->hasByValAttr() && !F.onlyReadsMemory()))
+ continue;
+
+ Value *V = ArgumentConstants[i].first;
+ if (V == 0) V = UndefValue::get(AI->getType());
+ AI->replaceAllUsesWith(V);
+ ++NumArgumentsProped;
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+
+// Check to see if this function returns one or more constants. If so, replace
+// all callers that use those return values with the constant value. This will
+// leave in the actual return values and instructions, but deadargelim will
+// clean that up.
+//
+// Additionally if a function always returns one of its arguments directly,
+// callers will be updated to use the value they pass in directly instead of
+// using the return value.
+bool IPCP::PropagateConstantReturn(Function &F) {
+ if (F.getReturnType()->isVoidTy())
+ return false; // No return value.
+
+ // If this function could be overridden later in the link stage, we can't
+ // propagate information about its results into callers.
+ if (F.mayBeOverridden())
+ return false;
+
+ // Check to see if this function returns a constant.
+ SmallVector<Value *,4> RetVals;
+ const StructType *STy = dyn_cast<StructType>(F.getReturnType());
+ if (STy)
+ for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i)
+ RetVals.push_back(UndefValue::get(STy->getElementType(i)));
+ else
+ RetVals.push_back(UndefValue::get(F.getReturnType()));
+
+ unsigned NumNonConstant = 0;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ for (unsigned i = 0, e = RetVals.size(); i != e; ++i) {
+ // Already found conflicting return values?
+ Value *RV = RetVals[i];
+ if (!RV)
+ continue;
+
+ // Find the returned value
+ Value *V;
+ if (!STy)
+ V = RI->getOperand(i);
+ else
+ V = FindInsertedValue(RI->getOperand(0), i);
+
+ if (V) {
+ // Ignore undefs, we can change them into anything
+ if (isa<UndefValue>(V))
+ continue;
+
+ // Try to see if all the rets return the same constant or argument.
+ if (isa<Constant>(V) || isa<Argument>(V)) {
+ if (isa<UndefValue>(RV)) {
+ // No value found yet? Try the current one.
+ RetVals[i] = V;
+ continue;
+ }
+ // Returning the same value? Good.
+ if (RV == V)
+ continue;
+ }
+ }
+ // Different or no known return value? Don't propagate this return
+ // value.
+ RetVals[i] = 0;
+ // All values non constant? Stop looking.
+ if (++NumNonConstant == RetVals.size())
+ return false;
+ }
+ }
+
+ // If we got here, the function returns at least one constant value. Loop
+ // over all users, replacing any uses of the return value with the returned
+ // constant.
+ bool MadeChange = false;
+ for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) {
+ CallSite CS(*UI);
+ Instruction* Call = CS.getInstruction();
+
+ // Not a call instruction or a call instruction that's not calling F
+ // directly?
+ if (!Call || !CS.isCallee(UI))
+ continue;
+
+ // Call result not used?
+ if (Call->use_empty())
+ continue;
+
+ MadeChange = true;
+
+ if (STy == 0) {
+ Value* New = RetVals[0];
+ if (Argument *A = dyn_cast<Argument>(New))
+ // Was an argument returned? Then find the corresponding argument in
+ // the call instruction and use that.
+ New = CS.getArgument(A->getArgNo());
+ Call->replaceAllUsesWith(New);
+ continue;
+ }
+
+ for (Value::use_iterator I = Call->use_begin(), E = Call->use_end();
+ I != E;) {
+ Instruction *Ins = cast<Instruction>(*I);
+
+ // Increment now, so we can remove the use
+ ++I;
+
+ // Find the index of the retval to replace with
+ int index = -1;
+ if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Ins))
+ if (EV->hasIndices())
+ index = *EV->idx_begin();
+
+ // If this use uses a specific return value, and we have a replacement,
+ // replace it.
+ if (index != -1) {
+ Value *New = RetVals[index];
+ if (New) {
+ if (Argument *A = dyn_cast<Argument>(New))
+ // Was an argument returned? Then find the corresponding argument in
+ // the call instruction and use that.
+ New = CS.getArgument(A->getArgNo());
+ Ins->replaceAllUsesWith(New);
+ Ins->eraseFromParent();
+ }
+ }
+ }
+ }
+
+ if (MadeChange) ++NumReturnValProped;
+ return MadeChange;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
new file mode 100644
index 0000000..340b70e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
@@ -0,0 +1,84 @@
+//===-- Scalar.cpp --------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the C bindings for libLLVMIPO.a, which implements
+// several transformations over the LLVM intermediate representation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Transforms/IPO.h"
+#include "llvm/PassManager.h"
+#include "llvm/Transforms/IPO.h"
+
+using namespace llvm;
+
+void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createArgumentPromotionPass());
+}
+
+void LLVMAddConstantMergePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createConstantMergePass());
+}
+
+void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createDeadArgEliminationPass());
+}
+
+void LLVMAddDeadTypeEliminationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createDeadTypeEliminationPass());
+}
+
+void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createFunctionAttrsPass());
+}
+
+void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createFunctionInliningPass());
+}
+
+void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createGlobalDCEPass());
+}
+
+void LLVMAddGlobalOptimizerPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createGlobalOptimizerPass());
+}
+
+void LLVMAddIPConstantPropagationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createIPConstantPropagationPass());
+}
+
+void LLVMAddLowerSetJmpPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLowerSetJmpPass());
+}
+
+void LLVMAddPruneEHPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createPruneEHPass());
+}
+
+void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createIPSCCPPass());
+}
+
+void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) {
+ unwrap(PM)->add(createInternalizePass(AllButMain != 0));
+}
+
+
+void LLVMAddRaiseAllocationsPass(LLVMPassManagerRef PM) {
+ // FIXME: Remove in LLVM 3.0.
+}
+
+void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createStripDeadPrototypesPass());
+}
+
+void LLVMAddStripSymbolsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createStripSymbolsPass());
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
new file mode 100644
index 0000000..ecc60ad
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
@@ -0,0 +1,80 @@
+//===- InlineAlways.cpp - Code to inline always_inline functions ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a custom inliner that handles only functions that
+// are marked as "always inline".
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "inline"
+#include "llvm/CallingConv.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+using namespace llvm;
+
+namespace {
+
+ // AlwaysInliner only inlines functions that are mark as "always inline".
+ class AlwaysInliner : public Inliner {
+ // Functions that are never inlined
+ SmallPtrSet<const Function*, 16> NeverInline;
+ InlineCostAnalyzer CA;
+ public:
+ // Use extremely low threshold.
+ AlwaysInliner() : Inliner(ID, -2000000000) {}
+ static char ID; // Pass identification, replacement for typeid
+ InlineCost getInlineCost(CallSite CS) {
+ return CA.getInlineCost(CS, NeverInline);
+ }
+ float getInlineFudgeFactor(CallSite CS) {
+ return CA.getInlineFudgeFactor(CS);
+ }
+ void resetCachedCostInfo(Function *Caller) {
+ CA.resetCachedCostInfo(Caller);
+ }
+ void growCachedCostInfo(Function* Caller, Function* Callee) {
+ CA.growCachedCostInfo(Caller, Callee);
+ }
+ virtual bool doFinalization(CallGraph &CG) {
+ return removeDeadFunctions(CG, &NeverInline);
+ }
+ virtual bool doInitialization(CallGraph &CG);
+ void releaseMemory() {
+ CA.clear();
+ }
+ };
+}
+
+char AlwaysInliner::ID = 0;
+INITIALIZE_PASS(AlwaysInliner, "always-inline",
+ "Inliner for always_inline functions", false, false);
+
+Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); }
+
+// doInitialization - Initializes the vector of functions that have not
+// been annotated with the "always inline" attribute.
+bool AlwaysInliner::doInitialization(CallGraph &CG) {
+ Module &M = CG.getModule();
+
+ for (Module::iterator I = M.begin(), E = M.end();
+ I != E; ++I)
+ if (!I->isDeclaration() && !I->hasFnAttr(Attribute::AlwaysInline))
+ NeverInline.insert(I);
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
new file mode 100644
index 0000000..9c6637d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
@@ -0,0 +1,111 @@
+//===- InlineSimple.cpp - Code to perform simple function inlining --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements bottom-up inlining of functions into callees.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "inline"
+#include "llvm/CallingConv.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+using namespace llvm;
+
+namespace {
+
+ class SimpleInliner : public Inliner {
+ // Functions that are never inlined
+ SmallPtrSet<const Function*, 16> NeverInline;
+ InlineCostAnalyzer CA;
+ public:
+ SimpleInliner() : Inliner(ID) {}
+ SimpleInliner(int Threshold) : Inliner(ID, Threshold) {}
+ static char ID; // Pass identification, replacement for typeid
+ InlineCost getInlineCost(CallSite CS) {
+ return CA.getInlineCost(CS, NeverInline);
+ }
+ float getInlineFudgeFactor(CallSite CS) {
+ return CA.getInlineFudgeFactor(CS);
+ }
+ void resetCachedCostInfo(Function *Caller) {
+ CA.resetCachedCostInfo(Caller);
+ }
+ void growCachedCostInfo(Function* Caller, Function* Callee) {
+ CA.growCachedCostInfo(Caller, Callee);
+ }
+ virtual bool doInitialization(CallGraph &CG);
+ void releaseMemory() {
+ CA.clear();
+ }
+ };
+}
+
+char SimpleInliner::ID = 0;
+INITIALIZE_PASS(SimpleInliner, "inline",
+ "Function Integration/Inlining", false, false);
+
+Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); }
+
+Pass *llvm::createFunctionInliningPass(int Threshold) {
+ return new SimpleInliner(Threshold);
+}
+
+// doInitialization - Initializes the vector of functions that have been
+// annotated with the noinline attribute.
+bool SimpleInliner::doInitialization(CallGraph &CG) {
+
+ Module &M = CG.getModule();
+
+ for (Module::iterator I = M.begin(), E = M.end();
+ I != E; ++I)
+ if (!I->isDeclaration() && I->hasFnAttr(Attribute::NoInline))
+ NeverInline.insert(I);
+
+ // Get llvm.noinline
+ GlobalVariable *GV = M.getNamedGlobal("llvm.noinline");
+
+ if (GV == 0)
+ return false;
+
+ // Don't crash on invalid code
+ if (!GV->hasDefinitiveInitializer())
+ return false;
+
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+
+ if (InitList == 0)
+ return false;
+
+ // Iterate over each element and add to the NeverInline set
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+
+ // Get Source
+ const Constant *Elt = InitList->getOperand(i);
+
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Elt))
+ if (CE->getOpcode() == Instruction::BitCast)
+ Elt = CE->getOperand(0);
+
+ // Insert into set of functions to never inline
+ if (const Function *F = dyn_cast<Function>(Elt))
+ NeverInline.insert(F);
+ }
+
+ return false;
+}
+
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
new file mode 100644
index 0000000..4983e8e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -0,0 +1,548 @@
+//===- Inliner.cpp - Code common to all inliners --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the mechanics required to implement inlining without
+// missing any calls and updating the call graph. The decisions of which calls
+// are profitable to inline are implemented elsewhere.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "inline"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInlined, "Number of functions inlined");
+STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
+STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
+STATISTIC(NumMergedAllocas, "Number of allocas merged together");
+
+static cl::opt<int>
+InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
+ cl::desc("Control the amount of inlining to perform (default = 225)"));
+
+static cl::opt<int>
+HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325),
+ cl::desc("Threshold for inlining functions with inline hint"));
+
+// Threshold to use when optsize is specified (and there is no -inline-limit).
+const int OptSizeThreshold = 75;
+
+Inliner::Inliner(char &ID)
+ : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {}
+
+Inliner::Inliner(char &ID, int Threshold)
+ : CallGraphSCCPass(ID), InlineThreshold(Threshold) {}
+
+/// getAnalysisUsage - For this class, we declare that we require and preserve
+/// the call graph. If the derived class implements this method, it should
+/// always explicitly call the implementation here.
+void Inliner::getAnalysisUsage(AnalysisUsage &Info) const {
+ CallGraphSCCPass::getAnalysisUsage(Info);
+}
+
+
+typedef DenseMap<const ArrayType*, std::vector<AllocaInst*> >
+InlinedArrayAllocasTy;
+
+/// InlineCallIfPossible - If it is possible to inline the specified call site,
+/// do so and update the CallGraph for this operation.
+///
+/// This function also does some basic book-keeping to update the IR. The
+/// InlinedArrayAllocas map keeps track of any allocas that are already
+/// available from other functions inlined into the caller. If we are able to
+/// inline this call site we attempt to reuse already available allocas or add
+/// any new allocas to the set if not possible.
+static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
+ InlinedArrayAllocasTy &InlinedArrayAllocas) {
+ Function *Callee = CS.getCalledFunction();
+ Function *Caller = CS.getCaller();
+
+ // Try to inline the function. Get the list of static allocas that were
+ // inlined.
+ if (!InlineFunction(CS, IFI))
+ return false;
+
+ // If the inlined function had a higher stack protection level than the
+ // calling function, then bump up the caller's stack protection level.
+ if (Callee->hasFnAttr(Attribute::StackProtectReq))
+ Caller->addFnAttr(Attribute::StackProtectReq);
+ else if (Callee->hasFnAttr(Attribute::StackProtect) &&
+ !Caller->hasFnAttr(Attribute::StackProtectReq))
+ Caller->addFnAttr(Attribute::StackProtect);
+
+
+ // Look at all of the allocas that we inlined through this call site. If we
+ // have already inlined other allocas through other calls into this function,
+ // then we know that they have disjoint lifetimes and that we can merge them.
+ //
+ // There are many heuristics possible for merging these allocas, and the
+ // different options have different tradeoffs. One thing that we *really*
+ // don't want to hurt is SRoA: once inlining happens, often allocas are no
+ // longer address taken and so they can be promoted.
+ //
+ // Our "solution" for that is to only merge allocas whose outermost type is an
+ // array type. These are usually not promoted because someone is using a
+ // variable index into them. These are also often the most important ones to
+ // merge.
+ //
+ // A better solution would be to have real memory lifetime markers in the IR
+ // and not have the inliner do any merging of allocas at all. This would
+ // allow the backend to do proper stack slot coloring of all allocas that
+ // *actually make it to the backend*, which is really what we want.
+ //
+ // Because we don't have this information, we do this simple and useful hack.
+ //
+ SmallPtrSet<AllocaInst*, 16> UsedAllocas;
+
+ // Loop over all the allocas we have so far and see if they can be merged with
+ // a previously inlined alloca. If not, remember that we had it.
+ for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size();
+ AllocaNo != e; ++AllocaNo) {
+ AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
+
+ // Don't bother trying to merge array allocations (they will usually be
+ // canonicalized to be an allocation *of* an array), or allocations whose
+ // type is not itself an array (because we're afraid of pessimizing SRoA).
+ const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
+ if (ATy == 0 || AI->isArrayAllocation())
+ continue;
+
+ // Get the list of all available allocas for this array type.
+ std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy];
+
+ // Loop over the allocas in AllocasForType to see if we can reuse one. Note
+ // that we have to be careful not to reuse the same "available" alloca for
+ // multiple different allocas that we just inlined, we use the 'UsedAllocas'
+ // set to keep track of which "available" allocas are being used by this
+ // function. Also, AllocasForType can be empty of course!
+ bool MergedAwayAlloca = false;
+ for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) {
+ AllocaInst *AvailableAlloca = AllocasForType[i];
+
+ // The available alloca has to be in the right function, not in some other
+ // function in this SCC.
+ if (AvailableAlloca->getParent() != AI->getParent())
+ continue;
+
+ // If the inlined function already uses this alloca then we can't reuse
+ // it.
+ if (!UsedAllocas.insert(AvailableAlloca))
+ continue;
+
+ // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
+ // success!
+ DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI);
+
+ AI->replaceAllUsesWith(AvailableAlloca);
+ AI->eraseFromParent();
+ MergedAwayAlloca = true;
+ ++NumMergedAllocas;
+ break;
+ }
+
+ // If we already nuked the alloca, we're done with it.
+ if (MergedAwayAlloca)
+ continue;
+
+ // If we were unable to merge away the alloca either because there are no
+ // allocas of the right type available or because we reused them all
+ // already, remember that this alloca came from an inlined function and mark
+ // it used so we don't reuse it for other allocas from this inline
+ // operation.
+ AllocasForType.push_back(AI);
+ UsedAllocas.insert(AI);
+ }
+
+ return true;
+}
+
+unsigned Inliner::getInlineThreshold(CallSite CS) const {
+ int thres = InlineThreshold;
+
+ // Listen to optsize when -inline-limit is not given.
+ Function *Caller = CS.getCaller();
+ if (Caller && !Caller->isDeclaration() &&
+ Caller->hasFnAttr(Attribute::OptimizeForSize) &&
+ InlineLimit.getNumOccurrences() == 0)
+ thres = OptSizeThreshold;
+
+ // Listen to inlinehint when it would increase the threshold.
+ Function *Callee = CS.getCalledFunction();
+ if (HintThreshold > thres && Callee && !Callee->isDeclaration() &&
+ Callee->hasFnAttr(Attribute::InlineHint))
+ thres = HintThreshold;
+
+ return thres;
+}
+
+/// shouldInline - Return true if the inliner should attempt to inline
+/// at the given CallSite.
+bool Inliner::shouldInline(CallSite CS) {
+ InlineCost IC = getInlineCost(CS);
+
+ if (IC.isAlways()) {
+ DEBUG(dbgs() << " Inlining: cost=always"
+ << ", Call: " << *CS.getInstruction() << "\n");
+ return true;
+ }
+
+ if (IC.isNever()) {
+ DEBUG(dbgs() << " NOT Inlining: cost=never"
+ << ", Call: " << *CS.getInstruction() << "\n");
+ return false;
+ }
+
+ int Cost = IC.getValue();
+ Function *Caller = CS.getCaller();
+ int CurrentThreshold = getInlineThreshold(CS);
+ float FudgeFactor = getInlineFudgeFactor(CS);
+ int AdjThreshold = (int)(CurrentThreshold * FudgeFactor);
+ if (Cost >= AdjThreshold) {
+ DEBUG(dbgs() << " NOT Inlining: cost=" << Cost
+ << ", thres=" << AdjThreshold
+ << ", Call: " << *CS.getInstruction() << "\n");
+ return false;
+ }
+
+ // Try to detect the case where the current inlining candidate caller
+ // (call it B) is a static function and is an inlining candidate elsewhere,
+ // and the current candidate callee (call it C) is large enough that
+ // inlining it into B would make B too big to inline later. In these
+ // circumstances it may be best not to inline C into B, but to inline B
+ // into its callers.
+ if (Caller->hasLocalLinkage()) {
+ int TotalSecondaryCost = 0;
+ bool outerCallsFound = false;
+ bool allOuterCallsWillBeInlined = true;
+ bool someOuterCallWouldNotBeInlined = false;
+ for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end();
+ I != E; ++I) {
+ CallSite CS2(*I);
+
+ // If this isn't a call to Caller (it could be some other sort
+ // of reference) skip it.
+ if (!CS2 || CS2.getCalledFunction() != Caller)
+ continue;
+
+ InlineCost IC2 = getInlineCost(CS2);
+ if (IC2.isNever())
+ allOuterCallsWillBeInlined = false;
+ if (IC2.isAlways() || IC2.isNever())
+ continue;
+
+ outerCallsFound = true;
+ int Cost2 = IC2.getValue();
+ int CurrentThreshold2 = getInlineThreshold(CS2);
+ float FudgeFactor2 = getInlineFudgeFactor(CS2);
+
+ if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2))
+ allOuterCallsWillBeInlined = false;
+
+ // See if we have this case. We subtract off the penalty
+ // for the call instruction, which we would be deleting.
+ if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) &&
+ Cost2 + Cost - (InlineConstants::CallPenalty + 1) >=
+ (int)(CurrentThreshold2 * FudgeFactor2)) {
+ someOuterCallWouldNotBeInlined = true;
+ TotalSecondaryCost += Cost2;
+ }
+ }
+ // If all outer calls to Caller would get inlined, the cost for the last
+ // one is set very low by getInlineCost, in anticipation that Caller will
+ // be removed entirely. We did not account for this above unless there
+ // is only one caller of Caller.
+ if (allOuterCallsWillBeInlined && Caller->use_begin() != Caller->use_end())
+ TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
+
+ if (outerCallsFound && someOuterCallWouldNotBeInlined &&
+ TotalSecondaryCost < Cost) {
+ DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() <<
+ " Cost = " << Cost <<
+ ", outer Cost = " << TotalSecondaryCost << '\n');
+ return false;
+ }
+ }
+
+ DEBUG(dbgs() << " Inlining: cost=" << Cost
+ << ", thres=" << AdjThreshold
+ << ", Call: " << *CS.getInstruction() << '\n');
+ return true;
+}
+
+/// InlineHistoryIncludes - Return true if the specified inline history ID
+/// indicates an inline history that includes the specified function.
+static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
+ const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) {
+ while (InlineHistoryID != -1) {
+ assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
+ "Invalid inline history ID");
+ if (InlineHistory[InlineHistoryID].first == F)
+ return true;
+ InlineHistoryID = InlineHistory[InlineHistoryID].second;
+ }
+ return false;
+}
+
+
+bool Inliner::runOnSCC(CallGraphSCC &SCC) {
+ CallGraph &CG = getAnalysis<CallGraph>();
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+
+ SmallPtrSet<Function*, 8> SCCFunctions;
+ DEBUG(dbgs() << "Inliner visiting SCC:");
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+ if (F) SCCFunctions.insert(F);
+ DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
+ }
+
+ // Scan through and identify all call sites ahead of time so that we only
+ // inline call sites in the original functions, not call sites that result
+ // from inlining other functions.
+ SmallVector<std::pair<CallSite, int>, 16> CallSites;
+
+ // When inlining a callee produces new call sites, we want to keep track of
+ // the fact that they were inlined from the callee. This allows us to avoid
+ // infinite inlining in some obscure cases. To represent this, we use an
+ // index into the InlineHistory vector.
+ SmallVector<std::pair<Function*, int>, 8> InlineHistory;
+
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+ if (!F) continue;
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ CallSite CS(cast<Value>(I));
+ // If this isn't a call, or it is a call to an intrinsic, it can
+ // never be inlined.
+ if (!CS || isa<IntrinsicInst>(I))
+ continue;
+
+ // If this is a direct call to an external function, we can never inline
+ // it. If it is an indirect call, inlining may resolve it to be a
+ // direct call, so we keep it.
+ if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration())
+ continue;
+
+ CallSites.push_back(std::make_pair(CS, -1));
+ }
+ }
+
+ DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
+
+ // If there are no calls in this function, exit early.
+ if (CallSites.empty())
+ return false;
+
+ // Now that we have all of the call sites, move the ones to functions in the
+ // current SCC to the end of the list.
+ unsigned FirstCallInSCC = CallSites.size();
+ for (unsigned i = 0; i < FirstCallInSCC; ++i)
+ if (Function *F = CallSites[i].first.getCalledFunction())
+ if (SCCFunctions.count(F))
+ std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
+
+
+ InlinedArrayAllocasTy InlinedArrayAllocas;
+ InlineFunctionInfo InlineInfo(&CG, TD);
+
+ // Now that we have all of the call sites, loop over them and inline them if
+ // it looks profitable to do so.
+ bool Changed = false;
+ bool LocalChange;
+ do {
+ LocalChange = false;
+ // Iterate over the outer loop because inlining functions can cause indirect
+ // calls to become direct calls.
+ for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
+ CallSite CS = CallSites[CSi].first;
+
+ Function *Caller = CS.getCaller();
+ Function *Callee = CS.getCalledFunction();
+
+ // If this call site is dead and it is to a readonly function, we should
+ // just delete the call instead of trying to inline it, regardless of
+ // size. This happens because IPSCCP propagates the result out of the
+ // call and then we're left with the dead call.
+ if (isInstructionTriviallyDead(CS.getInstruction())) {
+ DEBUG(dbgs() << " -> Deleting dead call: "
+ << *CS.getInstruction() << "\n");
+ // Update the call graph by deleting the edge from Callee to Caller.
+ CG[Caller]->removeCallEdgeFor(CS);
+ CS.getInstruction()->eraseFromParent();
+ ++NumCallsDeleted;
+ // Update the cached cost info with the missing call
+ growCachedCostInfo(Caller, NULL);
+ } else {
+ // We can only inline direct calls to non-declarations.
+ if (Callee == 0 || Callee->isDeclaration()) continue;
+
+ // If this call site was obtained by inlining another function, verify
+ // that the include path for the function did not include the callee
+ // itself. If so, we'd be recursively inlinling the same function,
+ // which would provide the same callsites, which would cause us to
+ // infinitely inline.
+ int InlineHistoryID = CallSites[CSi].second;
+ if (InlineHistoryID != -1 &&
+ InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
+ continue;
+
+
+ // If the policy determines that we should inline this function,
+ // try to do so.
+ if (!shouldInline(CS))
+ continue;
+
+ // Attempt to inline the function.
+ if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas))
+ continue;
+ ++NumInlined;
+
+ // If inlining this function gave us any new call sites, throw them
+ // onto our worklist to process. They are useful inline candidates.
+ if (!InlineInfo.InlinedCalls.empty()) {
+ // Create a new inline history entry for this, so that we remember
+ // that these new callsites came about due to inlining Callee.
+ int NewHistoryID = InlineHistory.size();
+ InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
+
+ for (unsigned i = 0, e = InlineInfo.InlinedCalls.size();
+ i != e; ++i) {
+ Value *Ptr = InlineInfo.InlinedCalls[i];
+ CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
+ }
+ }
+
+ // Update the cached cost info with the inlined call.
+ growCachedCostInfo(Caller, Callee);
+ }
+
+ // If we inlined or deleted the last possible call site to the function,
+ // delete the function body now.
+ if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
+ // TODO: Can remove if in SCC now.
+ !SCCFunctions.count(Callee) &&
+
+ // The function may be apparently dead, but if there are indirect
+ // callgraph references to the node, we cannot delete it yet, this
+ // could invalidate the CGSCC iterator.
+ CG[Callee]->getNumReferences() == 0) {
+ DEBUG(dbgs() << " -> Deleting dead function: "
+ << Callee->getName() << "\n");
+ CallGraphNode *CalleeNode = CG[Callee];
+
+ // Remove any call graph edges from the callee to its callees.
+ CalleeNode->removeAllCalledFunctions();
+
+ resetCachedCostInfo(Callee);
+
+ // Removing the node for callee from the call graph and delete it.
+ delete CG.removeFunctionFromModule(CalleeNode);
+ ++NumDeleted;
+ }
+
+ // Remove this call site from the list. If possible, use
+ // swap/pop_back for efficiency, but do not use it if doing so would
+ // move a call site to a function in this SCC before the
+ // 'FirstCallInSCC' barrier.
+ if (SCC.isSingular()) {
+ CallSites[CSi] = CallSites.back();
+ CallSites.pop_back();
+ } else {
+ CallSites.erase(CallSites.begin()+CSi);
+ }
+ --CSi;
+
+ Changed = true;
+ LocalChange = true;
+ }
+ } while (LocalChange);
+
+ return Changed;
+}
+
+// doFinalization - Remove now-dead linkonce functions at the end of
+// processing to avoid breaking the SCC traversal.
+bool Inliner::doFinalization(CallGraph &CG) {
+ return removeDeadFunctions(CG);
+}
+
+/// removeDeadFunctions - Remove dead functions that are not included in
+/// DNR (Do Not Remove) list.
+bool Inliner::removeDeadFunctions(CallGraph &CG,
+ SmallPtrSet<const Function *, 16> *DNR) {
+ SmallPtrSet<CallGraphNode*, 16> FunctionsToRemove;
+
+ // Scan for all of the functions, looking for ones that should now be removed
+ // from the program. Insert the dead ones in the FunctionsToRemove set.
+ for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) {
+ CallGraphNode *CGN = I->second;
+ if (CGN->getFunction() == 0)
+ continue;
+
+ Function *F = CGN->getFunction();
+
+ // If the only remaining users of the function are dead constants, remove
+ // them.
+ F->removeDeadConstantUsers();
+
+ if (DNR && DNR->count(F))
+ continue;
+ if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
+ !F->hasAvailableExternallyLinkage())
+ continue;
+ if (!F->use_empty())
+ continue;
+
+ // Remove any call graph edges from the function to its callees.
+ CGN->removeAllCalledFunctions();
+
+ // Remove any edges from the external node to the function's call graph
+ // node. These edges might have been made irrelegant due to
+ // optimization of the program.
+ CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
+
+ // Removing the node for callee from the call graph and delete it.
+ FunctionsToRemove.insert(CGN);
+ }
+
+ // Now that we know which functions to delete, do so. We didn't want to do
+ // this inline, because that would invalidate our CallGraph::iterator
+ // objects. :(
+ //
+ // Note that it doesn't matter that we are iterating over a non-stable set
+ // here to do this, it doesn't matter which order the functions are deleted
+ // in.
+ bool Changed = false;
+ for (SmallPtrSet<CallGraphNode*, 16>::iterator I = FunctionsToRemove.begin(),
+ E = FunctionsToRemove.end(); I != E; ++I) {
+ resetCachedCostInfo((*I)->getFunction());
+ delete CG.removeFunctionFromModule(*I);
+ ++NumDeleted;
+ Changed = true;
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
new file mode 100644
index 0000000..a1d919f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -0,0 +1,190 @@
+//===-- Internalize.cpp - Mark functions internal -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass loops over all of the functions in the input module, looking for a
+// main function. If a main function is found, all other functions and all
+// global variables with initializers are marked as internal.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "internalize"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Pass.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include <fstream>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumAliases , "Number of aliases internalized");
+STATISTIC(NumFunctions, "Number of functions internalized");
+STATISTIC(NumGlobals , "Number of global vars internalized");
+
+// APIFile - A file which contains a list of symbols that should not be marked
+// external.
+static cl::opt<std::string>
+APIFile("internalize-public-api-file", cl::value_desc("filename"),
+ cl::desc("A file containing list of symbol names to preserve"));
+
+// APIList - A list of symbols that should not be marked internal.
+static cl::list<std::string>
+APIList("internalize-public-api-list", cl::value_desc("list"),
+ cl::desc("A list of symbol names to preserve"),
+ cl::CommaSeparated);
+
+namespace {
+ class InternalizePass : public ModulePass {
+ std::set<std::string> ExternalNames;
+ /// If no api symbols were specified and a main function is defined,
+ /// assume the main function is the only API
+ bool AllButMain;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit InternalizePass(bool AllButMain = true);
+ explicit InternalizePass(const std::vector <const char *>& exportList);
+ void LoadFile(const char *Filename);
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreserved<CallGraph>();
+ }
+ };
+} // end anonymous namespace
+
+char InternalizePass::ID = 0;
+INITIALIZE_PASS(InternalizePass, "internalize",
+ "Internalize Global Symbols", false, false);
+
+InternalizePass::InternalizePass(bool AllButMain)
+ : ModulePass(ID), AllButMain(AllButMain){
+ if (!APIFile.empty()) // If a filename is specified, use it.
+ LoadFile(APIFile.c_str());
+ if (!APIList.empty()) // If a list is specified, use it as well.
+ ExternalNames.insert(APIList.begin(), APIList.end());
+}
+
+InternalizePass::InternalizePass(const std::vector<const char *>&exportList)
+ : ModulePass(ID), AllButMain(false){
+ for(std::vector<const char *>::const_iterator itr = exportList.begin();
+ itr != exportList.end(); itr++) {
+ ExternalNames.insert(*itr);
+ }
+}
+
+void InternalizePass::LoadFile(const char *Filename) {
+ // Load the APIFile...
+ std::ifstream In(Filename);
+ if (!In.good()) {
+ errs() << "WARNING: Internalize couldn't load file '" << Filename
+ << "'! Continuing as if it's empty.\n";
+ return; // Just continue as if the file were empty
+ }
+ while (In) {
+ std::string Symbol;
+ In >> Symbol;
+ if (!Symbol.empty())
+ ExternalNames.insert(Symbol);
+ }
+}
+
+bool InternalizePass::runOnModule(Module &M) {
+ CallGraph *CG = getAnalysisIfAvailable<CallGraph>();
+ CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
+
+ if (ExternalNames.empty()) {
+ // Return if we're not in 'all but main' mode and have no external api
+ if (!AllButMain)
+ return false;
+ // If no list or file of symbols was specified, check to see if there is a
+ // "main" symbol defined in the module. If so, use it, otherwise do not
+ // internalize the module, it must be a library or something.
+ //
+ Function *MainFunc = M.getFunction("main");
+ if (MainFunc == 0 || MainFunc->isDeclaration())
+ return false; // No main found, must be a library...
+
+ // Preserve main, internalize all else.
+ ExternalNames.insert(MainFunc->getName());
+ }
+
+ bool Changed = false;
+
+ // Mark all functions not in the api as internal.
+ // FIXME: maybe use private linkage?
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration() && // Function must be defined here
+ !I->hasLocalLinkage() && // Can't already have internal linkage
+ !ExternalNames.count(I->getName())) {// Not marked to keep external?
+ I->setLinkage(GlobalValue::InternalLinkage);
+ // Remove a callgraph edge from the external node to this function.
+ if (ExternalNode) ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
+ Changed = true;
+ ++NumFunctions;
+ DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n");
+ }
+
+ // Never internalize the llvm.used symbol. It is used to implement
+ // attribute((used)).
+ // FIXME: Shouldn't this just filter on llvm.metadata section??
+ ExternalNames.insert("llvm.used");
+ ExternalNames.insert("llvm.compiler.used");
+
+ // Never internalize anchors used by the machine module info, else the info
+ // won't find them. (see MachineModuleInfo.)
+ ExternalNames.insert("llvm.dbg.compile_units");
+ ExternalNames.insert("llvm.dbg.global_variables");
+ ExternalNames.insert("llvm.dbg.subprograms");
+ ExternalNames.insert("llvm.global_ctors");
+ ExternalNames.insert("llvm.global_dtors");
+ ExternalNames.insert("llvm.noinline");
+ ExternalNames.insert("llvm.global.annotations");
+
+ // Mark all global variables with initializers that are not in the api as
+ // internal as well.
+ // FIXME: maybe use private linkage?
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (!I->isDeclaration() && !I->hasLocalLinkage() &&
+ // Available externally is really just a "declaration with a body".
+ !I->hasAvailableExternallyLinkage() &&
+ !ExternalNames.count(I->getName())) {
+ I->setLinkage(GlobalValue::InternalLinkage);
+ Changed = true;
+ ++NumGlobals;
+ DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
+ }
+
+ // Mark all aliases that are not in the api as internal as well.
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E; ++I)
+ if (!I->isDeclaration() && !I->hasInternalLinkage() &&
+ // Available externally is really just a "declaration with a body".
+ !I->hasAvailableExternallyLinkage() &&
+ !ExternalNames.count(I->getName())) {
+ I->setLinkage(GlobalValue::InternalLinkage);
+ Changed = true;
+ ++NumAliases;
+ DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
+ }
+
+ return Changed;
+}
+
+ModulePass *llvm::createInternalizePass(bool AllButMain) {
+ return new InternalizePass(AllButMain);
+}
+
+ModulePass *llvm::createInternalizePass(const std::vector <const char *> &el) {
+ return new InternalizePass(el);
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
new file mode 100644
index 0000000..f88dff6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -0,0 +1,241 @@
+//===- LoopExtractor.cpp - Extract each loop into a new function ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass wrapper around the ExtractLoop() scalar transformation to extract each
+// top-level loop into its own new function. If the loop is the ONLY loop in a
+// given function, it is not touched. This is a pass most useful for debugging
+// via bugpoint.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-extract"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/ADT/Statistic.h"
+#include <fstream>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumExtracted, "Number of loops extracted");
+
+namespace {
+ struct LoopExtractor : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+ unsigned NumLoops;
+
+ explicit LoopExtractor(unsigned numLoops = ~0)
+ : LoopPass(ID), NumLoops(numLoops) {}
+
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(BreakCriticalEdgesID);
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<DominatorTree>();
+ }
+ };
+}
+
+char LoopExtractor::ID = 0;
+INITIALIZE_PASS(LoopExtractor, "loop-extract",
+ "Extract loops into new functions", false, false);
+
+namespace {
+ /// SingleLoopExtractor - For bugpoint.
+ struct SingleLoopExtractor : public LoopExtractor {
+ static char ID; // Pass identification, replacement for typeid
+ SingleLoopExtractor() : LoopExtractor(1) {}
+ };
+} // End anonymous namespace
+
+char SingleLoopExtractor::ID = 0;
+INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
+ "Extract at most one loop into a new function", false, false);
+
+// createLoopExtractorPass - This pass extracts all natural loops from the
+// program into a function if it can.
+//
+Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); }
+
+bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
+ // Only visit top-level loops.
+ if (L->getParentLoop())
+ return false;
+
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->isLoopSimplifyForm())
+ return false;
+
+ DominatorTree &DT = getAnalysis<DominatorTree>();
+ bool Changed = false;
+
+ // If there is more than one top-level loop in this function, extract all of
+ // the loops. Otherwise there is exactly one top-level loop; in this case if
+ // this function is more than a minimal wrapper around the loop, extract
+ // the loop.
+ bool ShouldExtractLoop = false;
+
+ // Extract the loop if the entry block doesn't branch to the loop header.
+ TerminatorInst *EntryTI =
+ L->getHeader()->getParent()->getEntryBlock().getTerminator();
+ if (!isa<BranchInst>(EntryTI) ||
+ !cast<BranchInst>(EntryTI)->isUnconditional() ||
+ EntryTI->getSuccessor(0) != L->getHeader())
+ ShouldExtractLoop = true;
+ else {
+ // Check to see if any exits from the loop are more than just return
+ // blocks.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (!isa<ReturnInst>(ExitBlocks[i]->getTerminator())) {
+ ShouldExtractLoop = true;
+ break;
+ }
+ }
+ if (ShouldExtractLoop) {
+ if (NumLoops == 0) return Changed;
+ --NumLoops;
+ if (ExtractLoop(DT, L) != 0) {
+ Changed = true;
+ // After extraction, the loop is replaced by a function call, so
+ // we shouldn't try to run any more loop passes on it.
+ LPM.deleteLoopFromQueue(L);
+ }
+ ++NumExtracted;
+ }
+
+ return Changed;
+}
+
+// createSingleLoopExtractorPass - This pass extracts one natural loop from the
+// program into a function if it can. This is used by bugpoint.
+//
+Pass *llvm::createSingleLoopExtractorPass() {
+ return new SingleLoopExtractor();
+}
+
+
+// BlockFile - A file which contains a list of blocks that should not be
+// extracted.
+static cl::opt<std::string>
+BlockFile("extract-blocks-file", cl::value_desc("filename"),
+ cl::desc("A file containing list of basic blocks to not extract"),
+ cl::Hidden);
+
+namespace {
+ /// BlockExtractorPass - This pass is used by bugpoint to extract all blocks
+ /// from the module into their own functions except for those specified by the
+ /// BlocksToNotExtract list.
+ class BlockExtractorPass : public ModulePass {
+ void LoadFile(const char *Filename);
+
+ std::vector<BasicBlock*> BlocksToNotExtract;
+ std::vector<std::pair<std::string, std::string> > BlocksToNotExtractByName;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ BlockExtractorPass() : ModulePass(ID) {
+ if (!BlockFile.empty())
+ LoadFile(BlockFile.c_str());
+ }
+
+ bool runOnModule(Module &M);
+ };
+}
+
+char BlockExtractorPass::ID = 0;
+INITIALIZE_PASS(BlockExtractorPass, "extract-blocks",
+ "Extract Basic Blocks From Module (for bugpoint use)",
+ false, false);
+
+// createBlockExtractorPass - This pass extracts all blocks (except those
+// specified in the argument list) from the functions in the module.
+//
+ModulePass *llvm::createBlockExtractorPass()
+{
+ return new BlockExtractorPass();
+}
+
+void BlockExtractorPass::LoadFile(const char *Filename) {
+ // Load the BlockFile...
+ std::ifstream In(Filename);
+ if (!In.good()) {
+ errs() << "WARNING: BlockExtractor couldn't load file '" << Filename
+ << "'!\n";
+ return;
+ }
+ while (In) {
+ std::string FunctionName, BlockName;
+ In >> FunctionName;
+ In >> BlockName;
+ if (!BlockName.empty())
+ BlocksToNotExtractByName.push_back(
+ std::make_pair(FunctionName, BlockName));
+ }
+}
+
+bool BlockExtractorPass::runOnModule(Module &M) {
+ std::set<BasicBlock*> TranslatedBlocksToNotExtract;
+ for (unsigned i = 0, e = BlocksToNotExtract.size(); i != e; ++i) {
+ BasicBlock *BB = BlocksToNotExtract[i];
+ Function *F = BB->getParent();
+
+ // Map the corresponding function in this module.
+ Function *MF = M.getFunction(F->getName());
+ assert(MF->getFunctionType() == F->getFunctionType() && "Wrong function?");
+
+ // Figure out which index the basic block is in its function.
+ Function::iterator BBI = MF->begin();
+ std::advance(BBI, std::distance(F->begin(), Function::iterator(BB)));
+ TranslatedBlocksToNotExtract.insert(BBI);
+ }
+
+ while (!BlocksToNotExtractByName.empty()) {
+ // There's no way to find BBs by name without looking at every BB inside
+ // every Function. Fortunately, this is always empty except when used by
+ // bugpoint in which case correctness is more important than performance.
+
+ std::string &FuncName = BlocksToNotExtractByName.back().first;
+ std::string &BlockName = BlocksToNotExtractByName.back().second;
+
+ for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
+ Function &F = *FI;
+ if (F.getName() != FuncName) continue;
+
+ for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+ BasicBlock &BB = *BI;
+ if (BB.getName() != BlockName) continue;
+
+ TranslatedBlocksToNotExtract.insert(BI);
+ }
+ }
+
+ BlocksToNotExtractByName.pop_back();
+ }
+
+ // Now that we know which blocks to not extract, figure out which ones we WANT
+ // to extract.
+ std::vector<BasicBlock*> BlocksToExtract;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ if (!TranslatedBlocksToNotExtract.count(BB))
+ BlocksToExtract.push_back(BB);
+
+ for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i)
+ ExtractBasicBlock(BlocksToExtract[i]);
+
+ return !BlocksToExtract.empty();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp b/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp
new file mode 100644
index 0000000..6c715de
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp
@@ -0,0 +1,545 @@
+//===- LowerSetJmp.cpp - Code pertaining to lowering set/long jumps -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the lowering of setjmp and longjmp to use the
+// LLVM invoke and unwind instructions as necessary.
+//
+// Lowering of longjmp is fairly trivial. We replace the call with a
+// call to the LLVM library function "__llvm_sjljeh_throw_longjmp()".
+// This unwinds the stack for us calling all of the destructors for
+// objects allocated on the stack.
+//
+// At a setjmp call, the basic block is split and the setjmp removed.
+// The calls in a function that have a setjmp are converted to invoke
+// where the except part checks to see if it's a longjmp exception and,
+// if so, if it's handled in the function. If it is, then it gets the
+// value returned by the longjmp and goes to where the basic block was
+// split. Invoke instructions are handled in a similar fashion with the
+// original except block being executed if it isn't a longjmp except
+// that is handled by that function.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FIXME: This pass doesn't deal with PHI statements just yet. That is,
+// we expect this to occur before SSAification is done. This would seem
+// to make sense, but in general, it might be a good idea to make this
+// pass invokable via the "opt" command at will.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lowersetjmp"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include <map>
+using namespace llvm;
+
+STATISTIC(LongJmpsTransformed, "Number of longjmps transformed");
+STATISTIC(SetJmpsTransformed , "Number of setjmps transformed");
+STATISTIC(CallsTransformed , "Number of calls invokified");
+STATISTIC(InvokesTransformed , "Number of invokes modified");
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ // LowerSetJmp pass implementation.
+ class LowerSetJmp : public ModulePass, public InstVisitor<LowerSetJmp> {
+ // LLVM library functions...
+ Constant *InitSJMap; // __llvm_sjljeh_init_setjmpmap
+ Constant *DestroySJMap; // __llvm_sjljeh_destroy_setjmpmap
+ Constant *AddSJToMap; // __llvm_sjljeh_add_setjmp_to_map
+ Constant *ThrowLongJmp; // __llvm_sjljeh_throw_longjmp
+ Constant *TryCatchLJ; // __llvm_sjljeh_try_catching_longjmp_exception
+ Constant *IsLJException; // __llvm_sjljeh_is_longjmp_exception
+ Constant *GetLJValue; // __llvm_sjljeh_get_longjmp_value
+
+ typedef std::pair<SwitchInst*, CallInst*> SwitchValuePair;
+
+ // Keep track of those basic blocks reachable via a depth-first search of
+ // the CFG from a setjmp call. We only need to transform those "call" and
+ // "invoke" instructions that are reachable from the setjmp call site.
+ std::set<BasicBlock*> DFSBlocks;
+
+ // The setjmp map is going to hold information about which setjmps
+ // were called (each setjmp gets its own number) and with which
+ // buffer it was called.
+ std::map<Function*, AllocaInst*> SJMap;
+
+ // The rethrow basic block map holds the basic block to branch to if
+ // the exception isn't handled in the current function and needs to
+ // be rethrown.
+ std::map<const Function*, BasicBlock*> RethrowBBMap;
+
+ // The preliminary basic block map holds a basic block that grabs the
+ // exception and determines if it's handled by the current function.
+ std::map<const Function*, BasicBlock*> PrelimBBMap;
+
+ // The switch/value map holds a switch inst/call inst pair. The
+ // switch inst controls which handler (if any) gets called and the
+ // value is the value returned to that handler by the call to
+ // __llvm_sjljeh_get_longjmp_value.
+ std::map<const Function*, SwitchValuePair> SwitchValMap;
+
+ // A map of which setjmps we've seen so far in a function.
+ std::map<const Function*, unsigned> SetJmpIDMap;
+
+ AllocaInst* GetSetJmpMap(Function* Func);
+ BasicBlock* GetRethrowBB(Function* Func);
+ SwitchValuePair GetSJSwitch(Function* Func, BasicBlock* Rethrow);
+
+ void TransformLongJmpCall(CallInst* Inst);
+ void TransformSetJmpCall(CallInst* Inst);
+
+ bool IsTransformableFunction(StringRef Name);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ LowerSetJmp() : ModulePass(ID) {}
+
+ void visitCallInst(CallInst& CI);
+ void visitInvokeInst(InvokeInst& II);
+ void visitReturnInst(ReturnInst& RI);
+ void visitUnwindInst(UnwindInst& UI);
+
+ bool runOnModule(Module& M);
+ bool doInitialization(Module& M);
+ };
+} // end anonymous namespace
+
+char LowerSetJmp::ID = 0;
+INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false);
+
+// run - Run the transformation on the program. We grab the function
+// prototypes for longjmp and setjmp. If they are used in the program,
+// then we can go directly to the places they're at and transform them.
+bool LowerSetJmp::runOnModule(Module& M) {
+ bool Changed = false;
+
+ // These are what the functions are called.
+ Function* SetJmp = M.getFunction("llvm.setjmp");
+ Function* LongJmp = M.getFunction("llvm.longjmp");
+
+ // This program doesn't have longjmp and setjmp calls.
+ if ((!LongJmp || LongJmp->use_empty()) &&
+ (!SetJmp || SetJmp->use_empty())) return false;
+
+ // Initialize some values and functions we'll need to transform the
+ // setjmp/longjmp functions.
+ doInitialization(M);
+
+ if (SetJmp) {
+ for (Value::use_iterator B = SetJmp->use_begin(), E = SetJmp->use_end();
+ B != E; ++B) {
+ BasicBlock* BB = cast<Instruction>(*B)->getParent();
+ for (df_ext_iterator<BasicBlock*> I = df_ext_begin(BB, DFSBlocks),
+ E = df_ext_end(BB, DFSBlocks); I != E; ++I)
+ /* empty */;
+ }
+
+ while (!SetJmp->use_empty()) {
+ assert(isa<CallInst>(SetJmp->use_back()) &&
+ "User of setjmp intrinsic not a call?");
+ TransformSetJmpCall(cast<CallInst>(SetJmp->use_back()));
+ Changed = true;
+ }
+ }
+
+ if (LongJmp)
+ while (!LongJmp->use_empty()) {
+ assert(isa<CallInst>(LongJmp->use_back()) &&
+ "User of longjmp intrinsic not a call?");
+ TransformLongJmpCall(cast<CallInst>(LongJmp->use_back()));
+ Changed = true;
+ }
+
+ // Now go through the affected functions and convert calls and invokes
+ // to new invokes...
+ for (std::map<Function*, AllocaInst*>::iterator
+ B = SJMap.begin(), E = SJMap.end(); B != E; ++B) {
+ Function* F = B->first;
+ for (Function::iterator BB = F->begin(), BE = F->end(); BB != BE; ++BB)
+ for (BasicBlock::iterator IB = BB->begin(), IE = BB->end(); IB != IE; ) {
+ visit(*IB++);
+ if (IB != BB->end() && IB->getParent() != BB)
+ break; // The next instruction got moved to a different block!
+ }
+ }
+
+ DFSBlocks.clear();
+ SJMap.clear();
+ RethrowBBMap.clear();
+ PrelimBBMap.clear();
+ SwitchValMap.clear();
+ SetJmpIDMap.clear();
+
+ return Changed;
+}
+
+// doInitialization - For the lower long/setjmp pass, this ensures that a
+// module contains a declaration for the intrisic functions we are going
+// to call to convert longjmp and setjmp calls.
+//
+// This function is always successful, unless it isn't.
+bool LowerSetJmp::doInitialization(Module& M)
+{
+ const Type *SBPTy = Type::getInt8PtrTy(M.getContext());
+ const Type *SBPPTy = PointerType::getUnqual(SBPTy);
+
+ // N.B. See llvm/runtime/GCCLibraries/libexception/SJLJ-Exception.h for
+ // a description of the following library functions.
+
+ // void __llvm_sjljeh_init_setjmpmap(void**)
+ InitSJMap = M.getOrInsertFunction("__llvm_sjljeh_init_setjmpmap",
+ Type::getVoidTy(M.getContext()),
+ SBPPTy, (Type *)0);
+ // void __llvm_sjljeh_destroy_setjmpmap(void**)
+ DestroySJMap = M.getOrInsertFunction("__llvm_sjljeh_destroy_setjmpmap",
+ Type::getVoidTy(M.getContext()),
+ SBPPTy, (Type *)0);
+
+ // void __llvm_sjljeh_add_setjmp_to_map(void**, void*, unsigned)
+ AddSJToMap = M.getOrInsertFunction("__llvm_sjljeh_add_setjmp_to_map",
+ Type::getVoidTy(M.getContext()),
+ SBPPTy, SBPTy,
+ Type::getInt32Ty(M.getContext()),
+ (Type *)0);
+
+ // void __llvm_sjljeh_throw_longjmp(int*, int)
+ ThrowLongJmp = M.getOrInsertFunction("__llvm_sjljeh_throw_longjmp",
+ Type::getVoidTy(M.getContext()), SBPTy,
+ Type::getInt32Ty(M.getContext()),
+ (Type *)0);
+
+ // unsigned __llvm_sjljeh_try_catching_longjmp_exception(void **)
+ TryCatchLJ =
+ M.getOrInsertFunction("__llvm_sjljeh_try_catching_longjmp_exception",
+ Type::getInt32Ty(M.getContext()), SBPPTy, (Type *)0);
+
+ // bool __llvm_sjljeh_is_longjmp_exception()
+ IsLJException = M.getOrInsertFunction("__llvm_sjljeh_is_longjmp_exception",
+ Type::getInt1Ty(M.getContext()),
+ (Type *)0);
+
+ // int __llvm_sjljeh_get_longjmp_value()
+ GetLJValue = M.getOrInsertFunction("__llvm_sjljeh_get_longjmp_value",
+ Type::getInt32Ty(M.getContext()),
+ (Type *)0);
+ return true;
+}
+
+// IsTransformableFunction - Return true if the function name isn't one
+// of the ones we don't want transformed. Currently, don't transform any
+// "llvm.{setjmp,longjmp}" functions and none of the setjmp/longjmp error
+// handling functions (beginning with __llvm_sjljeh_...they don't throw
+// exceptions).
+bool LowerSetJmp::IsTransformableFunction(StringRef Name) {
+ return !Name.startswith("__llvm_sjljeh_");
+}
+
+// TransformLongJmpCall - Transform a longjmp call into a call to the
+// internal __llvm_sjljeh_throw_longjmp function. It then takes care of
+// throwing the exception for us.
+void LowerSetJmp::TransformLongJmpCall(CallInst* Inst)
+{
+ const Type* SBPTy = Type::getInt8PtrTy(Inst->getContext());
+
+ // Create the call to "__llvm_sjljeh_throw_longjmp". This takes the
+ // same parameters as "longjmp", except that the buffer is cast to a
+ // char*. It returns "void", so it doesn't need to replace any of
+ // Inst's uses and doesn't get a name.
+ CastInst* CI =
+ new BitCastInst(Inst->getArgOperand(0), SBPTy, "LJBuf", Inst);
+ Value *Args[] = { CI, Inst->getArgOperand(1) };
+ CallInst::Create(ThrowLongJmp, Args, Args + 2, "", Inst);
+
+ SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()];
+
+ // If the function has a setjmp call in it (they are transformed first)
+ // we should branch to the basic block that determines if this longjmp
+ // is applicable here. Otherwise, issue an unwind.
+ if (SVP.first)
+ BranchInst::Create(SVP.first->getParent(), Inst);
+ else
+ new UnwindInst(Inst->getContext(), Inst);
+
+ // Remove all insts after the branch/unwind inst. Go from back to front to
+ // avoid replaceAllUsesWith if possible.
+ BasicBlock *BB = Inst->getParent();
+ Instruction *Removed;
+ do {
+ Removed = &BB->back();
+ // If the removed instructions have any users, replace them now.
+ if (!Removed->use_empty())
+ Removed->replaceAllUsesWith(UndefValue::get(Removed->getType()));
+ Removed->eraseFromParent();
+ } while (Removed != Inst);
+
+ ++LongJmpsTransformed;
+}
+
+// GetSetJmpMap - Retrieve (create and initialize, if necessary) the
+// setjmp map. This map is going to hold information about which setjmps
+// were called (each setjmp gets its own number) and with which buffer it
+// was called. There can be only one!
+AllocaInst* LowerSetJmp::GetSetJmpMap(Function* Func)
+{
+ if (SJMap[Func]) return SJMap[Func];
+
+ // Insert the setjmp map initialization before the first instruction in
+ // the function.
+ Instruction* Inst = Func->getEntryBlock().begin();
+ assert(Inst && "Couldn't find even ONE instruction in entry block!");
+
+ // Fill in the alloca and call to initialize the SJ map.
+ const Type *SBPTy =
+ Type::getInt8PtrTy(Func->getContext());
+ AllocaInst* Map = new AllocaInst(SBPTy, 0, "SJMap", Inst);
+ CallInst::Create(InitSJMap, Map, "", Inst);
+ return SJMap[Func] = Map;
+}
+
+// GetRethrowBB - Only one rethrow basic block is needed per function.
+// If this is a longjmp exception but not handled in this block, this BB
+// performs the rethrow.
+BasicBlock* LowerSetJmp::GetRethrowBB(Function* Func)
+{
+ if (RethrowBBMap[Func]) return RethrowBBMap[Func];
+
+ // The basic block we're going to jump to if we need to rethrow the
+ // exception.
+ BasicBlock* Rethrow =
+ BasicBlock::Create(Func->getContext(), "RethrowExcept", Func);
+
+ // Fill in the "Rethrow" BB with a call to rethrow the exception. This
+ // is the last instruction in the BB since at this point the runtime
+ // should exit this function and go to the next function.
+ new UnwindInst(Func->getContext(), Rethrow);
+ return RethrowBBMap[Func] = Rethrow;
+}
+
+// GetSJSwitch - Return the switch statement that controls which handler
+// (if any) gets called and the value returned to that handler.
+LowerSetJmp::SwitchValuePair LowerSetJmp::GetSJSwitch(Function* Func,
+ BasicBlock* Rethrow)
+{
+ if (SwitchValMap[Func].first) return SwitchValMap[Func];
+
+ BasicBlock* LongJmpPre =
+ BasicBlock::Create(Func->getContext(), "LongJmpBlkPre", Func);
+
+ // Keep track of the preliminary basic block for some of the other
+ // transformations.
+ PrelimBBMap[Func] = LongJmpPre;
+
+ // Grab the exception.
+ CallInst* Cond = CallInst::Create(IsLJException, "IsLJExcept", LongJmpPre);
+
+ // The "decision basic block" gets the number associated with the
+ // setjmp call returning to switch on and the value returned by
+ // longjmp.
+ BasicBlock* DecisionBB =
+ BasicBlock::Create(Func->getContext(), "LJDecisionBB", Func);
+
+ BranchInst::Create(DecisionBB, Rethrow, Cond, LongJmpPre);
+
+ // Fill in the "decision" basic block.
+ CallInst* LJVal = CallInst::Create(GetLJValue, "LJVal", DecisionBB);
+ CallInst* SJNum = CallInst::Create(TryCatchLJ, GetSetJmpMap(Func), "SJNum",
+ DecisionBB);
+
+ SwitchInst* SI = SwitchInst::Create(SJNum, Rethrow, 0, DecisionBB);
+ return SwitchValMap[Func] = SwitchValuePair(SI, LJVal);
+}
+
+// TransformSetJmpCall - The setjmp call is a bit trickier to transform.
+// We're going to convert all setjmp calls to nops. Then all "call" and
+// "invoke" instructions in the function are converted to "invoke" where
+// the "except" branch is used when returning from a longjmp call.
+void LowerSetJmp::TransformSetJmpCall(CallInst* Inst)
+{
+ BasicBlock* ABlock = Inst->getParent();
+ Function* Func = ABlock->getParent();
+
+ // Add this setjmp to the setjmp map.
+ const Type* SBPTy =
+ Type::getInt8PtrTy(Inst->getContext());
+ CastInst* BufPtr =
+ new BitCastInst(Inst->getArgOperand(0), SBPTy, "SBJmpBuf", Inst);
+ Value *Args[] = {
+ GetSetJmpMap(Func), BufPtr,
+ ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++)
+ };
+ CallInst::Create(AddSJToMap, Args, Args + 3, "", Inst);
+
+ // We are guaranteed that there are no values live across basic blocks
+ // (because we are "not in SSA form" yet), but there can still be values live
+ // in basic blocks. Because of this, splitting the setjmp block can cause
+ // values above the setjmp to not dominate uses which are after the setjmp
+ // call. For all of these occasions, we must spill the value to the stack.
+ //
+ std::set<Instruction*> InstrsAfterCall;
+
+ // The call is probably very close to the end of the basic block, for the
+ // common usage pattern of: 'if (setjmp(...))', so keep track of the
+ // instructions after the call.
+ for (BasicBlock::iterator I = ++BasicBlock::iterator(Inst), E = ABlock->end();
+ I != E; ++I)
+ InstrsAfterCall.insert(I);
+
+ for (BasicBlock::iterator II = ABlock->begin();
+ II != BasicBlock::iterator(Inst); ++II)
+ // Loop over all of the uses of instruction. If any of them are after the
+ // call, "spill" the value to the stack.
+ for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
+ UI != E; ++UI) {
+ User *U = *UI;
+ if (cast<Instruction>(U)->getParent() != ABlock ||
+ InstrsAfterCall.count(cast<Instruction>(U))) {
+ DemoteRegToStack(*II);
+ break;
+ }
+ }
+ InstrsAfterCall.clear();
+
+ // Change the setjmp call into a branch statement. We'll remove the
+ // setjmp call in a little bit. No worries.
+ BasicBlock* SetJmpContBlock = ABlock->splitBasicBlock(Inst);
+ assert(SetJmpContBlock && "Couldn't split setjmp BB!!");
+
+ SetJmpContBlock->setName(ABlock->getName()+"SetJmpCont");
+
+ // Add the SetJmpContBlock to the set of blocks reachable from a setjmp.
+ DFSBlocks.insert(SetJmpContBlock);
+
+ // This PHI node will be in the new block created from the
+ // splitBasicBlock call.
+ PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()),
+ "SetJmpReturn", Inst);
+
+ // Coming from a call to setjmp, the return is 0.
+ PHI->addIncoming(Constant::getNullValue(Type::getInt32Ty(Inst->getContext())),
+ ABlock);
+
+ // Add the case for this setjmp's number...
+ SwitchValuePair SVP = GetSJSwitch(Func, GetRethrowBB(Func));
+ SVP.first->addCase(ConstantInt::get(Type::getInt32Ty(Inst->getContext()),
+ SetJmpIDMap[Func] - 1),
+ SetJmpContBlock);
+
+ // Value coming from the handling of the exception.
+ PHI->addIncoming(SVP.second, SVP.second->getParent());
+
+ // Replace all uses of this instruction with the PHI node created by
+ // the eradication of setjmp.
+ Inst->replaceAllUsesWith(PHI);
+ Inst->eraseFromParent();
+
+ ++SetJmpsTransformed;
+}
+
+// visitCallInst - This converts all LLVM call instructions into invoke
+// instructions. The except part of the invoke goes to the "LongJmpBlkPre"
+// that grabs the exception and proceeds to determine if it's a longjmp
+// exception or not.
+void LowerSetJmp::visitCallInst(CallInst& CI)
+{
+ if (CI.getCalledFunction())
+ if (!IsTransformableFunction(CI.getCalledFunction()->getName()) ||
+ CI.getCalledFunction()->isIntrinsic()) return;
+
+ BasicBlock* OldBB = CI.getParent();
+
+ // If not reachable from a setjmp call, don't transform.
+ if (!DFSBlocks.count(OldBB)) return;
+
+ BasicBlock* NewBB = OldBB->splitBasicBlock(CI);
+ assert(NewBB && "Couldn't split BB of \"call\" instruction!!");
+ DFSBlocks.insert(NewBB);
+ NewBB->setName("Call2Invoke");
+
+ Function* Func = OldBB->getParent();
+
+ // Construct the new "invoke" instruction.
+ TerminatorInst* Term = OldBB->getTerminator();
+ CallSite CS(&CI);
+ std::vector<Value*> Params(CS.arg_begin(), CS.arg_end());
+ InvokeInst* II =
+ InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func],
+ Params.begin(), Params.end(), CI.getName(), Term);
+ II->setCallingConv(CI.getCallingConv());
+ II->setAttributes(CI.getAttributes());
+
+ // Replace the old call inst with the invoke inst and remove the call.
+ CI.replaceAllUsesWith(II);
+ CI.eraseFromParent();
+
+ // The old terminator is useless now that we have the invoke inst.
+ Term->eraseFromParent();
+ ++CallsTransformed;
+}
+
+// visitInvokeInst - Converting the "invoke" instruction is fairly
+// straight-forward. The old exception part is replaced by a query asking
+// if this is a longjmp exception. If it is, then it goes to the longjmp
+// exception blocks. Otherwise, control is passed the old exception.
+void LowerSetJmp::visitInvokeInst(InvokeInst& II)
+{
+ if (II.getCalledFunction())
+ if (!IsTransformableFunction(II.getCalledFunction()->getName()) ||
+ II.getCalledFunction()->isIntrinsic()) return;
+
+ BasicBlock* BB = II.getParent();
+
+ // If not reachable from a setjmp call, don't transform.
+ if (!DFSBlocks.count(BB)) return;
+
+ BasicBlock* ExceptBB = II.getUnwindDest();
+
+ Function* Func = BB->getParent();
+ BasicBlock* NewExceptBB = BasicBlock::Create(II.getContext(),
+ "InvokeExcept", Func);
+
+ // If this is a longjmp exception, then branch to the preliminary BB of
+ // the longjmp exception handling. Otherwise, go to the old exception.
+ CallInst* IsLJExcept = CallInst::Create(IsLJException, "IsLJExcept",
+ NewExceptBB);
+
+ BranchInst::Create(PrelimBBMap[Func], ExceptBB, IsLJExcept, NewExceptBB);
+
+ II.setUnwindDest(NewExceptBB);
+ ++InvokesTransformed;
+}
+
+// visitReturnInst - We want to destroy the setjmp map upon exit from the
+// function.
+void LowerSetJmp::visitReturnInst(ReturnInst &RI) {
+ Function* Func = RI.getParent()->getParent();
+ CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &RI);
+}
+
+// visitUnwindInst - We want to destroy the setjmp map upon exit from the
+// function.
+void LowerSetJmp::visitUnwindInst(UnwindInst &UI) {
+ Function* Func = UI.getParent()->getParent();
+ CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &UI);
+}
+
+ModulePass *llvm::createLowerSetJmpPass() {
+ return new LowerSetJmp();
+}
+
diff --git a/contrib/llvm/lib/Transforms/IPO/Makefile b/contrib/llvm/lib/Transforms/IPO/Makefile
new file mode 100644
index 0000000..5c42374
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Transforms/IPO/Makefile -------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMipo
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
new file mode 100644
index 0000000..5d838f9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -0,0 +1,652 @@
+//===- MergeFunctions.cpp - Merge identical functions ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass looks for equivalent functions that are mergable and folds them.
+//
+// A hash is computed from the function, based on its type and number of
+// basic blocks.
+//
+// Once all hashes are computed, we perform an expensive equality comparison
+// on each function pair. This takes n^2/2 comparisons per bucket, so it's
+// important that the hash function be high quality. The equality comparison
+// iterates through each instruction in each basic block.
+//
+// When a match is found the functions are folded. If both functions are
+// overridable, we move the functionality into a new internal function and
+// leave two overridable thunks to it.
+//
+//===----------------------------------------------------------------------===//
+//
+// Future work:
+//
+// * virtual functions.
+//
+// Many functions have their address taken by the virtual function table for
+// the object they belong to. However, as long as it's only used for a lookup
+// and call, this is irrelevant, and we'd like to fold such functions.
+//
+// * switch from n^2 pair-wise comparisons to an n-way comparison for each
+// bucket.
+//
+// * be smarter about bitcasts.
+//
+// In order to fold functions, we will sometimes add either bitcast instructions
+// or bitcast constant expressions. Unfortunately, this can confound further
+// analysis since the two functions differ where one has a bitcast and the
+// other doesn't. We should learn to look through bitcasts.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mergefunc"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Constants.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include <vector>
+using namespace llvm;
+
+STATISTIC(NumFunctionsMerged, "Number of functions merged");
+
+namespace {
+ /// MergeFunctions finds functions which will generate identical machine code,
+ /// by considering all pointer types to be equivalent. Once identified,
+ /// MergeFunctions will fold them by replacing a call to one to a call to a
+ /// bitcast of the other.
+ ///
+ class MergeFunctions : public ModulePass {
+ public:
+ static char ID;
+ MergeFunctions() : ModulePass(ID) {}
+
+ bool runOnModule(Module &M);
+
+ private:
+ /// MergeTwoFunctions - Merge two equivalent functions. Upon completion, G
+ /// may be deleted, or may be converted into a thunk. In either case, it
+ /// should never be visited again.
+ void MergeTwoFunctions(Function *F, Function *G) const;
+
+ /// WriteThunk - Replace G with a simple tail call to bitcast(F). Also
+ /// replace direct uses of G with bitcast(F).
+ void WriteThunk(Function *F, Function *G) const;
+
+ TargetData *TD;
+ };
+}
+
+char MergeFunctions::ID = 0;
+INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false);
+
+ModulePass *llvm::createMergeFunctionsPass() {
+ return new MergeFunctions();
+}
+
+namespace {
+/// FunctionComparator - Compares two functions to determine whether or not
+/// they will generate machine code with the same behaviour. TargetData is
+/// used if available. The comparator always fails conservatively (erring on the
+/// side of claiming that two functions are different).
+class FunctionComparator {
+public:
+ FunctionComparator(const TargetData *TD, const Function *F1,
+ const Function *F2)
+ : F1(F1), F2(F2), TD(TD), IDMap1Count(0), IDMap2Count(0) {}
+
+ /// Compare - test whether the two functions have equivalent behaviour.
+ bool Compare();
+
+private:
+ /// Compare - test whether two basic blocks have equivalent behaviour.
+ bool Compare(const BasicBlock *BB1, const BasicBlock *BB2);
+
+ /// Enumerate - Assign or look up previously assigned numbers for the two
+ /// values, and return whether the numbers are equal. Numbers are assigned in
+ /// the order visited.
+ bool Enumerate(const Value *V1, const Value *V2);
+
+ /// isEquivalentOperation - Compare two Instructions for equivalence, similar
+ /// to Instruction::isSameOperationAs but with modifications to the type
+ /// comparison.
+ bool isEquivalentOperation(const Instruction *I1,
+ const Instruction *I2) const;
+
+ /// isEquivalentGEP - Compare two GEPs for equivalent pointer arithmetic.
+ bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2);
+ bool isEquivalentGEP(const GetElementPtrInst *GEP1,
+ const GetElementPtrInst *GEP2) {
+ return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2));
+ }
+
+ /// isEquivalentType - Compare two Types, treating all pointer types as equal.
+ bool isEquivalentType(const Type *Ty1, const Type *Ty2) const;
+
+ // The two functions undergoing comparison.
+ const Function *F1, *F2;
+
+ const TargetData *TD;
+
+ typedef DenseMap<const Value *, unsigned long> IDMap;
+ IDMap Map1, Map2;
+ unsigned long IDMap1Count, IDMap2Count;
+};
+}
+
+/// isEquivalentType - any two pointers in the same address space are
+/// equivalent. Otherwise, standard type equivalence rules apply.
+bool FunctionComparator::isEquivalentType(const Type *Ty1,
+ const Type *Ty2) const {
+ if (Ty1 == Ty2)
+ return true;
+ if (Ty1->getTypeID() != Ty2->getTypeID())
+ return false;
+
+ switch(Ty1->getTypeID()) {
+ default:
+ llvm_unreachable("Unknown type!");
+ // Fall through in Release mode.
+ case Type::IntegerTyID:
+ case Type::OpaqueTyID:
+ // Ty1 == Ty2 would have returned true earlier.
+ return false;
+
+ case Type::VoidTyID:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ case Type::LabelTyID:
+ case Type::MetadataTyID:
+ return true;
+
+ case Type::PointerTyID: {
+ const PointerType *PTy1 = cast<PointerType>(Ty1);
+ const PointerType *PTy2 = cast<PointerType>(Ty2);
+ return PTy1->getAddressSpace() == PTy2->getAddressSpace();
+ }
+
+ case Type::StructTyID: {
+ const StructType *STy1 = cast<StructType>(Ty1);
+ const StructType *STy2 = cast<StructType>(Ty2);
+ if (STy1->getNumElements() != STy2->getNumElements())
+ return false;
+
+ if (STy1->isPacked() != STy2->isPacked())
+ return false;
+
+ for (unsigned i = 0, e = STy1->getNumElements(); i != e; ++i) {
+ if (!isEquivalentType(STy1->getElementType(i), STy2->getElementType(i)))
+ return false;
+ }
+ return true;
+ }
+
+ case Type::FunctionTyID: {
+ const FunctionType *FTy1 = cast<FunctionType>(Ty1);
+ const FunctionType *FTy2 = cast<FunctionType>(Ty2);
+ if (FTy1->getNumParams() != FTy2->getNumParams() ||
+ FTy1->isVarArg() != FTy2->isVarArg())
+ return false;
+
+ if (!isEquivalentType(FTy1->getReturnType(), FTy2->getReturnType()))
+ return false;
+
+ for (unsigned i = 0, e = FTy1->getNumParams(); i != e; ++i) {
+ if (!isEquivalentType(FTy1->getParamType(i), FTy2->getParamType(i)))
+ return false;
+ }
+ return true;
+ }
+
+ case Type::ArrayTyID: {
+ const ArrayType *ATy1 = cast<ArrayType>(Ty1);
+ const ArrayType *ATy2 = cast<ArrayType>(Ty2);
+ return ATy1->getNumElements() == ATy2->getNumElements() &&
+ isEquivalentType(ATy1->getElementType(), ATy2->getElementType());
+ }
+
+ case Type::VectorTyID: {
+ const VectorType *VTy1 = cast<VectorType>(Ty1);
+ const VectorType *VTy2 = cast<VectorType>(Ty2);
+ return VTy1->getNumElements() == VTy2->getNumElements() &&
+ isEquivalentType(VTy1->getElementType(), VTy2->getElementType());
+ }
+ }
+}
+
+/// isEquivalentOperation - determine whether the two operations are the same
+/// except that pointer-to-A and pointer-to-B are equivalent. This should be
+/// kept in sync with Instruction::isSameOperationAs.
+bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
+ const Instruction *I2) const {
+ if (I1->getOpcode() != I2->getOpcode() ||
+ I1->getNumOperands() != I2->getNumOperands() ||
+ !isEquivalentType(I1->getType(), I2->getType()) ||
+ !I1->hasSameSubclassOptionalData(I2))
+ return false;
+
+ // We have two instructions of identical opcode and #operands. Check to see
+ // if all operands are the same type
+ for (unsigned i = 0, e = I1->getNumOperands(); i != e; ++i)
+ if (!isEquivalentType(I1->getOperand(i)->getType(),
+ I2->getOperand(i)->getType()))
+ return false;
+
+ // Check special state that is a part of some instructions.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I1))
+ return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() &&
+ LI->getAlignment() == cast<LoadInst>(I2)->getAlignment();
+ if (const StoreInst *SI = dyn_cast<StoreInst>(I1))
+ return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() &&
+ SI->getAlignment() == cast<StoreInst>(I2)->getAlignment();
+ if (const CmpInst *CI = dyn_cast<CmpInst>(I1))
+ return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate();
+ if (const CallInst *CI = dyn_cast<CallInst>(I1))
+ return CI->isTailCall() == cast<CallInst>(I2)->isTailCall() &&
+ CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() &&
+ CI->getAttributes().getRawPointer() ==
+ cast<CallInst>(I2)->getAttributes().getRawPointer();
+ if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1))
+ return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() &&
+ CI->getAttributes().getRawPointer() ==
+ cast<InvokeInst>(I2)->getAttributes().getRawPointer();
+ if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) {
+ if (IVI->getNumIndices() != cast<InsertValueInst>(I2)->getNumIndices())
+ return false;
+ for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i)
+ if (IVI->idx_begin()[i] != cast<InsertValueInst>(I2)->idx_begin()[i])
+ return false;
+ return true;
+ }
+ if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1)) {
+ if (EVI->getNumIndices() != cast<ExtractValueInst>(I2)->getNumIndices())
+ return false;
+ for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i)
+ if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I2)->idx_begin()[i])
+ return false;
+ return true;
+ }
+
+ return true;
+}
+
+/// isEquivalentGEP - determine whether two GEP operations perform the same
+/// underlying arithmetic.
+bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
+ const GEPOperator *GEP2) {
+ // When we have target data, we can reduce the GEP down to the value in bytes
+ // added to the address.
+ if (TD && GEP1->hasAllConstantIndices() && GEP2->hasAllConstantIndices()) {
+ SmallVector<Value *, 8> Indices1(GEP1->idx_begin(), GEP1->idx_end());
+ SmallVector<Value *, 8> Indices2(GEP2->idx_begin(), GEP2->idx_end());
+ uint64_t Offset1 = TD->getIndexedOffset(GEP1->getPointerOperandType(),
+ Indices1.data(), Indices1.size());
+ uint64_t Offset2 = TD->getIndexedOffset(GEP2->getPointerOperandType(),
+ Indices2.data(), Indices2.size());
+ return Offset1 == Offset2;
+ }
+
+ if (GEP1->getPointerOperand()->getType() !=
+ GEP2->getPointerOperand()->getType())
+ return false;
+
+ if (GEP1->getNumOperands() != GEP2->getNumOperands())
+ return false;
+
+ for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) {
+ if (!Enumerate(GEP1->getOperand(i), GEP2->getOperand(i)))
+ return false;
+ }
+
+ return true;
+}
+
+/// Enumerate - Compare two values used by the two functions under pair-wise
+/// comparison. If this is the first time the values are seen, they're added to
+/// the mapping so that we will detect mismatches on next use.
+bool FunctionComparator::Enumerate(const Value *V1, const Value *V2) {
+ // Check for function @f1 referring to itself and function @f2 referring to
+ // itself, or referring to each other, or both referring to either of them.
+ // They're all equivalent if the two functions are otherwise equivalent.
+ if (V1 == F1 && V2 == F2)
+ return true;
+ if (V1 == F2 && V2 == F1)
+ return true;
+
+ // TODO: constant expressions with GEP or references to F1 or F2.
+ if (isa<Constant>(V1))
+ return V1 == V2;
+
+ if (isa<InlineAsm>(V1) && isa<InlineAsm>(V2)) {
+ const InlineAsm *IA1 = cast<InlineAsm>(V1);
+ const InlineAsm *IA2 = cast<InlineAsm>(V2);
+ return IA1->getAsmString() == IA2->getAsmString() &&
+ IA1->getConstraintString() == IA2->getConstraintString();
+ }
+
+ unsigned long &ID1 = Map1[V1];
+ if (!ID1)
+ ID1 = ++IDMap1Count;
+
+ unsigned long &ID2 = Map2[V2];
+ if (!ID2)
+ ID2 = ++IDMap2Count;
+
+ return ID1 == ID2;
+}
+
+/// Compare - test whether two basic blocks have equivalent behaviour.
+bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) {
+ BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end();
+ BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end();
+
+ do {
+ if (!Enumerate(F1I, F2I))
+ return false;
+
+ if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) {
+ const GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(F2I);
+ if (!GEP2)
+ return false;
+
+ if (!Enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
+ return false;
+
+ if (!isEquivalentGEP(GEP1, GEP2))
+ return false;
+ } else {
+ if (!isEquivalentOperation(F1I, F2I))
+ return false;
+
+ assert(F1I->getNumOperands() == F2I->getNumOperands());
+ for (unsigned i = 0, e = F1I->getNumOperands(); i != e; ++i) {
+ Value *OpF1 = F1I->getOperand(i);
+ Value *OpF2 = F2I->getOperand(i);
+
+ if (!Enumerate(OpF1, OpF2))
+ return false;
+
+ if (OpF1->getValueID() != OpF2->getValueID() ||
+ !isEquivalentType(OpF1->getType(), OpF2->getType()))
+ return false;
+ }
+ }
+
+ ++F1I, ++F2I;
+ } while (F1I != F1E && F2I != F2E);
+
+ return F1I == F1E && F2I == F2E;
+}
+
+/// Compare - test whether the two functions have equivalent behaviour.
+bool FunctionComparator::Compare() {
+ // We need to recheck everything, but check the things that weren't included
+ // in the hash first.
+
+ if (F1->getAttributes() != F2->getAttributes())
+ return false;
+
+ if (F1->hasGC() != F2->hasGC())
+ return false;
+
+ if (F1->hasGC() && F1->getGC() != F2->getGC())
+ return false;
+
+ if (F1->hasSection() != F2->hasSection())
+ return false;
+
+ if (F1->hasSection() && F1->getSection() != F2->getSection())
+ return false;
+
+ if (F1->isVarArg() != F2->isVarArg())
+ return false;
+
+ // TODO: if it's internal and only used in direct calls, we could handle this
+ // case too.
+ if (F1->getCallingConv() != F2->getCallingConv())
+ return false;
+
+ if (!isEquivalentType(F1->getFunctionType(), F2->getFunctionType()))
+ return false;
+
+ assert(F1->arg_size() == F2->arg_size() &&
+ "Identical functions have a different number of args.");
+
+ // Visit the arguments so that they get enumerated in the order they're
+ // passed in.
+ for (Function::const_arg_iterator f1i = F1->arg_begin(),
+ f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) {
+ if (!Enumerate(f1i, f2i))
+ llvm_unreachable("Arguments repeat");
+ }
+
+ // We do a CFG-ordered walk since the actual ordering of the blocks in the
+ // linked list is immaterial. Our walk starts at the entry block for both
+ // functions, then takes each block from each terminator in order. As an
+ // artifact, this also means that unreachable blocks are ignored.
+ SmallVector<const BasicBlock *, 8> F1BBs, F2BBs;
+ SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1.
+
+ F1BBs.push_back(&F1->getEntryBlock());
+ F2BBs.push_back(&F2->getEntryBlock());
+
+ VisitedBBs.insert(F1BBs[0]);
+ while (!F1BBs.empty()) {
+ const BasicBlock *F1BB = F1BBs.pop_back_val();
+ const BasicBlock *F2BB = F2BBs.pop_back_val();
+
+ if (!Enumerate(F1BB, F2BB) || !Compare(F1BB, F2BB))
+ return false;
+
+ const TerminatorInst *F1TI = F1BB->getTerminator();
+ const TerminatorInst *F2TI = F2BB->getTerminator();
+
+ assert(F1TI->getNumSuccessors() == F2TI->getNumSuccessors());
+ for (unsigned i = 0, e = F1TI->getNumSuccessors(); i != e; ++i) {
+ if (!VisitedBBs.insert(F1TI->getSuccessor(i)))
+ continue;
+
+ F1BBs.push_back(F1TI->getSuccessor(i));
+ F2BBs.push_back(F2TI->getSuccessor(i));
+ }
+ }
+ return true;
+}
+
+/// WriteThunk - Replace G with a simple tail call to bitcast(F). Also replace
+/// direct uses of G with bitcast(F).
+void MergeFunctions::WriteThunk(Function *F, Function *G) const {
+ if (!G->mayBeOverridden()) {
+ // Redirect direct callers of G to F.
+ Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
+ for (Value::use_iterator UI = G->use_begin(), UE = G->use_end();
+ UI != UE;) {
+ Value::use_iterator TheIter = UI;
+ ++UI;
+ CallSite CS(*TheIter);
+ if (CS && CS.isCallee(TheIter))
+ TheIter.getUse().set(BitcastF);
+ }
+ }
+
+ // If G was internal then we may have replaced all uses if G with F. If so,
+ // stop here and delete G. There's no need for a thunk.
+ if (G->hasLocalLinkage() && G->use_empty()) {
+ G->eraseFromParent();
+ return;
+ }
+
+ Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
+ G->getParent());
+ BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);
+ IRBuilder<false> Builder(BB);
+
+ SmallVector<Value *, 16> Args;
+ unsigned i = 0;
+ const FunctionType *FFTy = F->getFunctionType();
+ for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
+ AI != AE; ++AI) {
+ Args.push_back(Builder.CreateBitCast(AI, FFTy->getParamType(i)));
+ ++i;
+ }
+
+ CallInst *CI = Builder.CreateCall(F, Args.begin(), Args.end());
+ CI->setTailCall();
+ CI->setCallingConv(F->getCallingConv());
+ if (NewG->getReturnType()->isVoidTy()) {
+ Builder.CreateRetVoid();
+ } else {
+ Builder.CreateRet(Builder.CreateBitCast(CI, NewG->getReturnType()));
+ }
+
+ NewG->copyAttributesFrom(G);
+ NewG->takeName(G);
+ G->replaceAllUsesWith(NewG);
+ G->eraseFromParent();
+}
+
+/// MergeTwoFunctions - Merge two equivalent functions. Upon completion,
+/// Function G is deleted.
+void MergeFunctions::MergeTwoFunctions(Function *F, Function *G) const {
+ if (F->isWeakForLinker()) {
+ assert(G->isWeakForLinker());
+
+ // Make them both thunks to the same internal function.
+ Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
+ F->getParent());
+ H->copyAttributesFrom(F);
+ H->takeName(F);
+ F->replaceAllUsesWith(H);
+
+ unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
+
+ WriteThunk(F, G);
+ WriteThunk(F, H);
+
+ F->setAlignment(MaxAlignment);
+ F->setLinkage(GlobalValue::InternalLinkage);
+ } else {
+ WriteThunk(F, G);
+ }
+
+ ++NumFunctionsMerged;
+}
+
+static unsigned ProfileFunction(const Function *F) {
+ const FunctionType *FTy = F->getFunctionType();
+
+ FoldingSetNodeID ID;
+ ID.AddInteger(F->size());
+ ID.AddInteger(F->getCallingConv());
+ ID.AddBoolean(F->hasGC());
+ ID.AddBoolean(FTy->isVarArg());
+ ID.AddInteger(FTy->getReturnType()->getTypeID());
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+ ID.AddInteger(FTy->getParamType(i)->getTypeID());
+ return ID.ComputeHash();
+}
+
+class ComparableFunction {
+public:
+ ComparableFunction(Function *Func, TargetData *TD)
+ : Func(Func), Hash(ProfileFunction(Func)), TD(TD) {}
+
+ AssertingVH<Function> const Func;
+ const unsigned Hash;
+ TargetData * const TD;
+};
+
+struct MergeFunctionsEqualityInfo {
+ static ComparableFunction *getEmptyKey() {
+ return reinterpret_cast<ComparableFunction*>(0);
+ }
+ static ComparableFunction *getTombstoneKey() {
+ return reinterpret_cast<ComparableFunction*>(-1);
+ }
+ static unsigned getHashValue(const ComparableFunction *CF) {
+ return CF->Hash;
+ }
+ static bool isEqual(const ComparableFunction *LHS,
+ const ComparableFunction *RHS) {
+ if (LHS == RHS)
+ return true;
+ if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
+ RHS == getEmptyKey() || RHS == getTombstoneKey())
+ return false;
+ assert(LHS->TD == RHS->TD && "Comparing functions for different targets");
+ return FunctionComparator(LHS->TD, LHS->Func, RHS->Func).Compare();
+ }
+};
+
+bool MergeFunctions::runOnModule(Module &M) {
+ typedef DenseSet<ComparableFunction *, MergeFunctionsEqualityInfo> FnSetType;
+
+ bool Changed = false;
+ TD = getAnalysisIfAvailable<TargetData>();
+
+ std::vector<Function *> Funcs;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage())
+ Funcs.push_back(F);
+ }
+
+ bool LocalChanged;
+ do {
+ LocalChanged = false;
+
+ FnSetType FnSet;
+ for (unsigned i = 0, e = Funcs.size(); i != e;) {
+ Function *F = Funcs[i];
+ ComparableFunction *NewF = new ComparableFunction(F, TD);
+ std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF);
+ if (!Result.second) {
+ ComparableFunction *&OldF = *Result.first;
+ assert(OldF && "Expected a hash collision");
+
+ // NewF will be deleted in favour of OldF unless NewF is strong and
+ // OldF is weak in which case swap them to keep the strong definition.
+
+ if (OldF->Func->isWeakForLinker() && !NewF->Func->isWeakForLinker())
+ std::swap(OldF, NewF);
+
+ DEBUG(dbgs() << " " << OldF->Func->getName() << " == "
+ << NewF->Func->getName() << '\n');
+
+ Funcs.erase(Funcs.begin() + i);
+ --e;
+
+ Function *DeleteF = NewF->Func;
+ delete NewF;
+ MergeTwoFunctions(OldF->Func, DeleteF);
+ LocalChanged = true;
+ Changed = true;
+ } else {
+ ++i;
+ }
+ }
+ DeleteContainerPointers(FnSet);
+ } while (LocalChanged);
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
new file mode 100644
index 0000000..432f7c5
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -0,0 +1,180 @@
+//===- PartialInlining.cpp - Inline parts of functions --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs partial inlining, typically by inlining an if statement
+// that surrounds the body of the function.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "partialinlining"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
+using namespace llvm;
+
+STATISTIC(NumPartialInlined, "Number of functions partially inlined");
+
+namespace {
+ struct PartialInliner : public ModulePass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
+ static char ID; // Pass identification, replacement for typeid
+ PartialInliner() : ModulePass(ID) {}
+
+ bool runOnModule(Module& M);
+
+ private:
+ Function* unswitchFunction(Function* F);
+ };
+}
+
+char PartialInliner::ID = 0;
+INITIALIZE_PASS(PartialInliner, "partial-inliner",
+ "Partial Inliner", false, false);
+
+ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
+
+Function* PartialInliner::unswitchFunction(Function* F) {
+ // First, verify that this function is an unswitching candidate...
+ BasicBlock* entryBlock = F->begin();
+ BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator());
+ if (!BR || BR->isUnconditional())
+ return 0;
+
+ BasicBlock* returnBlock = 0;
+ BasicBlock* nonReturnBlock = 0;
+ unsigned returnCount = 0;
+ for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock);
+ SI != SE; ++SI)
+ if (isa<ReturnInst>((*SI)->getTerminator())) {
+ returnBlock = *SI;
+ returnCount++;
+ } else
+ nonReturnBlock = *SI;
+
+ if (returnCount != 1)
+ return 0;
+
+ // Clone the function, so that we can hack away on it.
+ ValueMap<const Value*, Value*> VMap;
+ Function* duplicateFunction = CloneFunction(F, VMap,
+ /*ModuleLevelChanges=*/false);
+ duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
+ F->getParent()->getFunctionList().push_back(duplicateFunction);
+ BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]);
+ BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]);
+ BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]);
+
+ // Go ahead and update all uses to the duplicate, so that we can just
+ // use the inliner functionality when we're done hacking.
+ F->replaceAllUsesWith(duplicateFunction);
+
+ // Special hackery is needed with PHI nodes that have inputs from more than
+ // one extracted block. For simplicity, just split the PHIs into a two-level
+ // sequence of PHIs, some of which will go in the extracted region, and some
+ // of which will go outside.
+ BasicBlock* preReturn = newReturnBlock;
+ newReturnBlock = newReturnBlock->splitBasicBlock(
+ newReturnBlock->getFirstNonPHI());
+ BasicBlock::iterator I = preReturn->begin();
+ BasicBlock::iterator Ins = newReturnBlock->begin();
+ while (I != preReturn->end()) {
+ PHINode* OldPhi = dyn_cast<PHINode>(I);
+ if (!OldPhi) break;
+
+ PHINode* retPhi = PHINode::Create(OldPhi->getType(), "", Ins);
+ OldPhi->replaceAllUsesWith(retPhi);
+ Ins = newReturnBlock->getFirstNonPHI();
+
+ retPhi->addIncoming(I, preReturn);
+ retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock),
+ newEntryBlock);
+ OldPhi->removeIncomingValue(newEntryBlock);
+
+ ++I;
+ }
+ newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock);
+
+ // Gather up the blocks that we're going to extract.
+ std::vector<BasicBlock*> toExtract;
+ toExtract.push_back(newNonReturnBlock);
+ for (Function::iterator FI = duplicateFunction->begin(),
+ FE = duplicateFunction->end(); FI != FE; ++FI)
+ if (&*FI != newEntryBlock && &*FI != newReturnBlock &&
+ &*FI != newNonReturnBlock)
+ toExtract.push_back(FI);
+
+ // The CodeExtractor needs a dominator tree.
+ DominatorTree DT;
+ DT.runOnFunction(*duplicateFunction);
+
+ // Extract the body of the if.
+ Function* extractedFunction = ExtractCodeRegion(DT, toExtract);
+
+ InlineFunctionInfo IFI;
+
+ // Inline the top-level if test into all callers.
+ std::vector<User*> Users(duplicateFunction->use_begin(),
+ duplicateFunction->use_end());
+ for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end();
+ UI != UE; ++UI)
+ if (CallInst *CI = dyn_cast<CallInst>(*UI))
+ InlineFunction(CI, IFI);
+ else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI))
+ InlineFunction(II, IFI);
+
+ // Ditch the duplicate, since we're done with it, and rewrite all remaining
+ // users (function pointers, etc.) back to the original function.
+ duplicateFunction->replaceAllUsesWith(F);
+ duplicateFunction->eraseFromParent();
+
+ ++NumPartialInlined;
+
+ return extractedFunction;
+}
+
+bool PartialInliner::runOnModule(Module& M) {
+ std::vector<Function*> worklist;
+ worklist.reserve(M.size());
+ for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
+ if (!FI->use_empty() && !FI->isDeclaration())
+ worklist.push_back(&*FI);
+
+ bool changed = false;
+ while (!worklist.empty()) {
+ Function* currFunc = worklist.back();
+ worklist.pop_back();
+
+ if (currFunc->use_empty()) continue;
+
+ bool recursive = false;
+ for (Function::use_iterator UI = currFunc->use_begin(),
+ UE = currFunc->use_end(); UI != UE; ++UI)
+ if (Instruction* I = dyn_cast<Instruction>(*UI))
+ if (I->getParent()->getParent() == currFunc) {
+ recursive = true;
+ break;
+ }
+ if (recursive) continue;
+
+
+ if (Function* newFunc = unswitchFunction(currFunc)) {
+ worklist.push_back(newFunc);
+ changed = true;
+ }
+
+ }
+
+ return changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp b/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp
new file mode 100644
index 0000000..4a99a41
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp
@@ -0,0 +1,216 @@
+//===-- PartialSpecialization.cpp - Specialize for common constants--------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass finds function arguments that are often a common constant and
+// specializes a version of the called function for that constant.
+//
+// This pass simply does the cloning for functions it specializes. It depends
+// on IPSCCP and DAE to clean up the results.
+//
+// The initial heuristic favors constant arguments that are used in control
+// flow.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "partialspecialization"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constant.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/ADT/DenseSet.h"
+#include <map>
+using namespace llvm;
+
+STATISTIC(numSpecialized, "Number of specialized functions created");
+STATISTIC(numReplaced, "Number of callers replaced by specialization");
+
+// Maximum number of arguments markable interested
+static const int MaxInterests = 6;
+
+// Call must be used at least occasionally
+static const int CallsMin = 5;
+
+// Must have 10% of calls having the same constant to specialize on
+static const double ConstValPercent = .1;
+
+namespace {
+ typedef SmallVector<int, MaxInterests> InterestingArgVector;
+ class PartSpec : public ModulePass {
+ void scanForInterest(Function&, InterestingArgVector&);
+ int scanDistribution(Function&, int, std::map<Constant*, int>&);
+ public :
+ static char ID; // Pass identification, replacement for typeid
+ PartSpec() : ModulePass(ID) {}
+ bool runOnModule(Module &M);
+ };
+}
+
+char PartSpec::ID = 0;
+INITIALIZE_PASS(PartSpec, "partialspecialization",
+ "Partial Specialization", false, false);
+
+// Specialize F by replacing the arguments (keys) in replacements with the
+// constants (values). Replace all calls to F with those constants with
+// a call to the specialized function. Returns the specialized function
+static Function*
+SpecializeFunction(Function* F,
+ ValueMap<const Value*, Value*>& replacements) {
+ // arg numbers of deleted arguments
+ DenseMap<unsigned, const Argument*> deleted;
+ for (ValueMap<const Value*, Value*>::iterator
+ repb = replacements.begin(), repe = replacements.end();
+ repb != repe; ++repb) {
+ Argument const *arg = cast<const Argument>(repb->first);
+ deleted[arg->getArgNo()] = arg;
+ }
+
+ Function* NF = CloneFunction(F, replacements,
+ /*ModuleLevelChanges=*/false);
+ NF->setLinkage(GlobalValue::InternalLinkage);
+ F->getParent()->getFunctionList().push_back(NF);
+
+ for (Value::use_iterator ii = F->use_begin(), ee = F->use_end();
+ ii != ee; ) {
+ Value::use_iterator i = ii;
+ ++ii;
+ User *U = *i;
+ CallSite CS(U);
+ if (CS) {
+ if (CS.getCalledFunction() == F) {
+ SmallVector<Value*, 6> args;
+ // Assemble the non-specialized arguments for the updated callsite.
+ // In the process, make sure that the specialized arguments are
+ // constant and match the specialization. If that's not the case,
+ // this callsite needs to call the original or some other
+ // specialization; don't change it here.
+ CallSite::arg_iterator as = CS.arg_begin(), ae = CS.arg_end();
+ for (CallSite::arg_iterator ai = as; ai != ae; ++ai) {
+ DenseMap<unsigned, const Argument*>::iterator delit = deleted.find(
+ std::distance(as, ai));
+ if (delit == deleted.end())
+ args.push_back(cast<Value>(ai));
+ else {
+ Constant *ci = dyn_cast<Constant>(ai);
+ if (!(ci && ci == replacements[delit->second]))
+ goto next_use;
+ }
+ }
+ Value* NCall;
+ if (CallInst *CI = dyn_cast<CallInst>(U)) {
+ NCall = CallInst::Create(NF, args.begin(), args.end(),
+ CI->getName(), CI);
+ cast<CallInst>(NCall)->setTailCall(CI->isTailCall());
+ cast<CallInst>(NCall)->setCallingConv(CI->getCallingConv());
+ } else {
+ InvokeInst *II = cast<InvokeInst>(U);
+ NCall = InvokeInst::Create(NF, II->getNormalDest(),
+ II->getUnwindDest(),
+ args.begin(), args.end(),
+ II->getName(), II);
+ cast<InvokeInst>(NCall)->setCallingConv(II->getCallingConv());
+ }
+ CS.getInstruction()->replaceAllUsesWith(NCall);
+ CS.getInstruction()->eraseFromParent();
+ ++numReplaced;
+ }
+ }
+ next_use:;
+ }
+ return NF;
+}
+
+
+bool PartSpec::runOnModule(Module &M) {
+ bool Changed = false;
+ for (Module::iterator I = M.begin(); I != M.end(); ++I) {
+ Function &F = *I;
+ if (F.isDeclaration() || F.mayBeOverridden()) continue;
+ InterestingArgVector interestingArgs;
+ scanForInterest(F, interestingArgs);
+
+ // Find the first interesting Argument that we can specialize on
+ // If there are multiple interesting Arguments, then those will be found
+ // when processing the cloned function.
+ bool breakOuter = false;
+ for (unsigned int x = 0; !breakOuter && x < interestingArgs.size(); ++x) {
+ std::map<Constant*, int> distribution;
+ int total = scanDistribution(F, interestingArgs[x], distribution);
+ if (total > CallsMin)
+ for (std::map<Constant*, int>::iterator ii = distribution.begin(),
+ ee = distribution.end(); ii != ee; ++ii)
+ if (total > ii->second && ii->first &&
+ ii->second > total * ConstValPercent) {
+ ValueMap<const Value*, Value*> m;
+ Function::arg_iterator arg = F.arg_begin();
+ for (int y = 0; y < interestingArgs[x]; ++y)
+ ++arg;
+ m[&*arg] = ii->first;
+ SpecializeFunction(&F, m);
+ ++numSpecialized;
+ breakOuter = true;
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+}
+
+/// scanForInterest - This function decides which arguments would be worth
+/// specializing on.
+void PartSpec::scanForInterest(Function& F, InterestingArgVector& args) {
+ for(Function::arg_iterator ii = F.arg_begin(), ee = F.arg_end();
+ ii != ee; ++ii) {
+ for(Value::use_iterator ui = ii->use_begin(), ue = ii->use_end();
+ ui != ue; ++ui) {
+
+ bool interesting = false;
+ User *U = *ui;
+ if (isa<CmpInst>(U)) interesting = true;
+ else if (isa<CallInst>(U))
+ interesting = ui->getOperand(0) == ii;
+ else if (isa<InvokeInst>(U))
+ interesting = ui->getOperand(0) == ii;
+ else if (isa<SwitchInst>(U)) interesting = true;
+ else if (isa<BranchInst>(U)) interesting = true;
+
+ if (interesting) {
+ args.push_back(std::distance(F.arg_begin(), ii));
+ break;
+ }
+ }
+ }
+}
+
+/// scanDistribution - Construct a histogram of constants for arg of F at arg.
+int PartSpec::scanDistribution(Function& F, int arg,
+ std::map<Constant*, int>& dist) {
+ bool hasIndirect = false;
+ int total = 0;
+ for (Value::use_iterator ii = F.use_begin(), ee = F.use_end();
+ ii != ee; ++ii) {
+ User *U = *ii;
+ CallSite CS(U);
+ if (CS && CS.getCalledFunction() == &F) {
+ ++dist[dyn_cast<Constant>(CS.getArgument(arg))];
+ ++total;
+ } else
+ hasIndirect = true;
+ }
+
+ // Preserve the original address taken function even if all other uses
+ // will be specialized.
+ if (hasIndirect) ++total;
+ return total;
+}
+
+ModulePass* llvm::createPartialSpecializationPass() { return new PartSpec(); }
diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
new file mode 100644
index 0000000..09ac76f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -0,0 +1,252 @@
+//===- PruneEH.cpp - Pass which deletes unused exception handlers ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple interprocedural pass which walks the
+// call-graph, turning invoke instructions into calls, iff the callee cannot
+// throw an exception, and marking functions 'nounwind' if they cannot throw.
+// It implements this as a bottom-up traversal of the call-graph.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "prune-eh"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
+#include <set>
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumRemoved, "Number of invokes removed");
+STATISTIC(NumUnreach, "Number of noreturn calls optimized");
+
+namespace {
+ struct PruneEH : public CallGraphSCCPass {
+ static char ID; // Pass identification, replacement for typeid
+ PruneEH() : CallGraphSCCPass(ID) {}
+
+ // runOnSCC - Analyze the SCC, performing the transformation if possible.
+ bool runOnSCC(CallGraphSCC &SCC);
+
+ bool SimplifyFunction(Function *F);
+ void DeleteBasicBlock(BasicBlock *BB);
+ };
+}
+
+char PruneEH::ID = 0;
+INITIALIZE_PASS(PruneEH, "prune-eh",
+ "Remove unused exception handling info", false, false);
+
+Pass *llvm::createPruneEHPass() { return new PruneEH(); }
+
+
+bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
+ SmallPtrSet<CallGraphNode *, 8> SCCNodes;
+ CallGraph &CG = getAnalysis<CallGraph>();
+ bool MadeChange = false;
+
+ // Fill SCCNodes with the elements of the SCC. Used for quickly
+ // looking up whether a given CallGraphNode is in this SCC.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+ SCCNodes.insert(*I);
+
+ // First pass, scan all of the functions in the SCC, simplifying them
+ // according to what we know.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+ if (Function *F = (*I)->getFunction())
+ MadeChange |= SimplifyFunction(F);
+
+ // Next, check to see if any callees might throw or if there are any external
+ // functions in this SCC: if so, we cannot prune any functions in this SCC.
+ // Definitions that are weak and not declared non-throwing might be
+ // overridden at linktime with something that throws, so assume that.
+ // If this SCC includes the unwind instruction, we KNOW it throws, so
+ // obviously the SCC might throw.
+ //
+ bool SCCMightUnwind = false, SCCMightReturn = false;
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end();
+ (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) {
+ Function *F = (*I)->getFunction();
+ if (F == 0) {
+ SCCMightUnwind = true;
+ SCCMightReturn = true;
+ } else if (F->isDeclaration() || F->mayBeOverridden()) {
+ SCCMightUnwind |= !F->doesNotThrow();
+ SCCMightReturn |= !F->doesNotReturn();
+ } else {
+ bool CheckUnwind = !SCCMightUnwind && !F->doesNotThrow();
+ bool CheckReturn = !SCCMightReturn && !F->doesNotReturn();
+
+ if (!CheckUnwind && !CheckReturn)
+ continue;
+
+ // Check to see if this function performs an unwind or calls an
+ // unwinding function.
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ if (CheckUnwind && isa<UnwindInst>(BB->getTerminator())) {
+ // Uses unwind!
+ SCCMightUnwind = true;
+ } else if (CheckReturn && isa<ReturnInst>(BB->getTerminator())) {
+ SCCMightReturn = true;
+ }
+
+ // Invoke instructions don't allow unwinding to continue, so we are
+ // only interested in call instructions.
+ if (CheckUnwind && !SCCMightUnwind)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (CI->doesNotThrow()) {
+ // This call cannot throw.
+ } else if (Function *Callee = CI->getCalledFunction()) {
+ CallGraphNode *CalleeNode = CG[Callee];
+ // If the callee is outside our current SCC then we may
+ // throw because it might.
+ if (!SCCNodes.count(CalleeNode)) {
+ SCCMightUnwind = true;
+ break;
+ }
+ } else {
+ // Indirect call, it might throw.
+ SCCMightUnwind = true;
+ break;
+ }
+ }
+ if (SCCMightUnwind && SCCMightReturn) break;
+ }
+ }
+ }
+
+ // If the SCC doesn't unwind or doesn't throw, note this fact.
+ if (!SCCMightUnwind || !SCCMightReturn)
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Attributes NewAttributes = Attribute::None;
+
+ if (!SCCMightUnwind)
+ NewAttributes |= Attribute::NoUnwind;
+ if (!SCCMightReturn)
+ NewAttributes |= Attribute::NoReturn;
+
+ Function *F = (*I)->getFunction();
+ const AttrListPtr &PAL = F->getAttributes();
+ const AttrListPtr &NPAL = PAL.addAttr(~0, NewAttributes);
+ if (PAL != NPAL) {
+ MadeChange = true;
+ F->setAttributes(NPAL);
+ }
+ }
+
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ // Convert any invoke instructions to non-throwing functions in this node
+ // into call instructions with a branch. This makes the exception blocks
+ // dead.
+ if (Function *F = (*I)->getFunction())
+ MadeChange |= SimplifyFunction(F);
+ }
+
+ return MadeChange;
+}
+
+
+// SimplifyFunction - Given information about callees, simplify the specified
+// function if we have invokes to non-unwinding functions or code after calls to
+// no-return functions.
+bool PruneEH::SimplifyFunction(Function *F) {
+ bool MadeChange = false;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
+ if (II->doesNotThrow()) {
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+ // Insert a call instruction before the invoke.
+ CallInst *Call = CallInst::Create(II->getCalledValue(),
+ Args.begin(), Args.end(), "", II);
+ Call->takeName(II);
+ Call->setCallingConv(II->getCallingConv());
+ Call->setAttributes(II->getAttributes());
+
+ // Anything that used the value produced by the invoke instruction
+ // now uses the value produced by the call instruction. Note that we
+ // do this even for void functions and calls with no uses so that the
+ // callgraph edge is updated.
+ II->replaceAllUsesWith(Call);
+ BasicBlock *UnwindBlock = II->getUnwindDest();
+ UnwindBlock->removePredecessor(II->getParent());
+
+ // Insert a branch to the normal destination right before the
+ // invoke.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Finally, delete the invoke instruction!
+ BB->getInstList().pop_back();
+
+ // If the unwind block is now dead, nuke it.
+ if (pred_begin(UnwindBlock) == pred_end(UnwindBlock))
+ DeleteBasicBlock(UnwindBlock); // Delete the new BB.
+
+ ++NumRemoved;
+ MadeChange = true;
+ }
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
+ if (CallInst *CI = dyn_cast<CallInst>(I++))
+ if (CI->doesNotReturn() && !isa<UnreachableInst>(I)) {
+ // This call calls a function that cannot return. Insert an
+ // unreachable instruction after it and simplify the code. Do this
+ // by splitting the BB, adding the unreachable, then deleting the
+ // new BB.
+ BasicBlock *New = BB->splitBasicBlock(I);
+
+ // Remove the uncond branch and add an unreachable.
+ BB->getInstList().pop_back();
+ new UnreachableInst(BB->getContext(), BB);
+
+ DeleteBasicBlock(New); // Delete the new BB.
+ MadeChange = true;
+ ++NumUnreach;
+ break;
+ }
+ }
+
+ return MadeChange;
+}
+
+/// DeleteBasicBlock - remove the specified basic block from the program,
+/// updating the callgraph to reflect any now-obsolete edges due to calls that
+/// exist in the BB.
+void PruneEH::DeleteBasicBlock(BasicBlock *BB) {
+ assert(pred_begin(BB) == pred_end(BB) && "BB is not dead!");
+ CallGraph &CG = getAnalysis<CallGraph>();
+
+ CallGraphNode *CGN = CG[BB->getParent()];
+ for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; ) {
+ --I;
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (!isa<DbgInfoIntrinsic>(I))
+ CGN->removeCallEdgeFor(CI);
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+ CGN->removeCallEdgeFor(II);
+ if (!I->use_empty())
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ }
+
+ // Get the list of successors of this block.
+ std::vector<BasicBlock*> Succs(succ_begin(BB), succ_end(BB));
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+ Succs[i]->removePredecessor(BB);
+
+ BB->eraseFromParent();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
new file mode 100644
index 0000000..ee10ad0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -0,0 +1,71 @@
+//===-- StripDeadPrototypes.cpp - Remove unused function declarations ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass loops over all of the functions in the input module, looking for
+// dead declarations and removes them. Dead declarations are declarations of
+// functions for which no implementation is available (i.e., declarations for
+// unused library functions).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "strip-dead-prototypes"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Pass.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed");
+
+namespace {
+
+/// @brief Pass to remove unused function declarations.
+class StripDeadPrototypesPass : public ModulePass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ StripDeadPrototypesPass() : ModulePass(ID) { }
+ virtual bool runOnModule(Module &M);
+};
+
+} // end anonymous namespace
+
+char StripDeadPrototypesPass::ID = 0;
+INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes",
+ "Strip Unused Function Prototypes", false, false);
+
+bool StripDeadPrototypesPass::runOnModule(Module &M) {
+ bool MadeChange = false;
+
+ // Erase dead function prototypes.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+ Function *F = I++;
+ // Function must be a prototype and unused.
+ if (F->isDeclaration() && F->use_empty()) {
+ F->eraseFromParent();
+ ++NumDeadPrototypes;
+ MadeChange = true;
+ }
+ }
+
+ // Erase dead global var prototypes.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ) {
+ GlobalVariable *GV = I++;
+ // Global must be a prototype and unused.
+ if (GV->isDeclaration() && GV->use_empty())
+ GV->eraseFromParent();
+ }
+
+ // Return an indication of whether we changed anything or not.
+ return MadeChange;
+}
+
+ModulePass *llvm::createStripDeadPrototypesPass() {
+ return new StripDeadPrototypesPass();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
new file mode 100644
index 0000000..20b7b8f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -0,0 +1,400 @@
+//===- StripSymbols.cpp - Strip symbols and debug info from a module ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The StripSymbols transformation implements code stripping. Specifically, it
+// can delete:
+//
+// * names for virtual registers
+// * symbols for internal globals and functions
+// * debug information
+//
+// Note that this transformation makes code much less readable, so it should
+// only be used in situations where the 'strip' utility would be used, such as
+// reducing code size or making it harder to reverse engineer code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+namespace {
+ class StripSymbols : public ModulePass {
+ bool OnlyDebugInfo;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit StripSymbols(bool ODI = false)
+ : ModulePass(ID), OnlyDebugInfo(ODI) {}
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+
+ class StripNonDebugSymbols : public ModulePass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit StripNonDebugSymbols()
+ : ModulePass(ID) {}
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+
+ class StripDebugDeclare : public ModulePass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit StripDebugDeclare()
+ : ModulePass(ID) {}
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+
+ class StripDeadDebugInfo : public ModulePass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit StripDeadDebugInfo()
+ : ModulePass(ID) {}
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char StripSymbols::ID = 0;
+INITIALIZE_PASS(StripSymbols, "strip",
+ "Strip all symbols from a module", false, false);
+
+ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
+ return new StripSymbols(OnlyDebugInfo);
+}
+
+char StripNonDebugSymbols::ID = 0;
+INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug",
+ "Strip all symbols, except dbg symbols, from a module",
+ false, false);
+
+ModulePass *llvm::createStripNonDebugSymbolsPass() {
+ return new StripNonDebugSymbols();
+}
+
+char StripDebugDeclare::ID = 0;
+INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare",
+ "Strip all llvm.dbg.declare intrinsics", false, false);
+
+ModulePass *llvm::createStripDebugDeclarePass() {
+ return new StripDebugDeclare();
+}
+
+char StripDeadDebugInfo::ID = 0;
+INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info",
+ "Strip debug info for unused symbols", false, false);
+
+ModulePass *llvm::createStripDeadDebugInfoPass() {
+ return new StripDeadDebugInfo();
+}
+
+/// OnlyUsedBy - Return true if V is only used by Usr.
+static bool OnlyUsedBy(Value *V, Value *Usr) {
+ for(Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
+ User *U = *I;
+ if (U != Usr)
+ return false;
+ }
+ return true;
+}
+
+static void RemoveDeadConstant(Constant *C) {
+ assert(C->use_empty() && "Constant is not dead!");
+ SmallPtrSet<Constant*, 4> Operands;
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
+ if (isa<DerivedType>(C->getOperand(i)->getType()) &&
+ OnlyUsedBy(C->getOperand(i), C))
+ Operands.insert(cast<Constant>(C->getOperand(i)));
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+ if (!GV->hasLocalLinkage()) return; // Don't delete non static globals.
+ GV->eraseFromParent();
+ }
+ else if (!isa<Function>(C))
+ if (isa<CompositeType>(C->getType()))
+ C->destroyConstant();
+
+ // If the constant referenced anything, see if we can delete it as well.
+ for (SmallPtrSet<Constant*, 4>::iterator OI = Operands.begin(),
+ OE = Operands.end(); OI != OE; ++OI)
+ RemoveDeadConstant(*OI);
+}
+
+// Strip the symbol table of its names.
+//
+static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) {
+ for (ValueSymbolTable::iterator VI = ST.begin(), VE = ST.end(); VI != VE; ) {
+ Value *V = VI->getValue();
+ ++VI;
+ if (!isa<GlobalValue>(V) || cast<GlobalValue>(V)->hasLocalLinkage()) {
+ if (!PreserveDbgInfo || !V->getName().startswith("llvm.dbg"))
+ // Set name to "", removing from symbol table!
+ V->setName("");
+ }
+ }
+}
+
+// Strip the symbol table of its names.
+static void StripTypeSymtab(TypeSymbolTable &ST, bool PreserveDbgInfo) {
+ for (TypeSymbolTable::iterator TI = ST.begin(), E = ST.end(); TI != E; ) {
+ if (PreserveDbgInfo && StringRef(TI->first).startswith("llvm.dbg"))
+ ++TI;
+ else
+ ST.remove(TI++);
+ }
+}
+
+/// Find values that are marked as llvm.used.
+static void findUsedValues(GlobalVariable *LLVMUsed,
+ SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
+ if (LLVMUsed == 0) return;
+ UsedValues.insert(LLVMUsed);
+
+ ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
+ if (Inits == 0) return;
+
+ for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
+ if (GlobalValue *GV =
+ dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+ UsedValues.insert(GV);
+}
+
+/// StripSymbolNames - Strip symbol names.
+static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
+
+ SmallPtrSet<const GlobalValue*, 8> llvmUsedValues;
+ findUsedValues(M.getGlobalVariable("llvm.used"), llvmUsedValues);
+ findUsedValues(M.getGlobalVariable("llvm.compiler.used"), llvmUsedValues);
+
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
+ if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
+ I->setName(""); // Internal symbols can't participate in linkage
+ }
+
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
+ if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
+ I->setName(""); // Internal symbols can't participate in linkage
+ StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo);
+ }
+
+ // Remove all names from types.
+ StripTypeSymtab(M.getTypeSymbolTable(), PreserveDbgInfo);
+
+ return true;
+}
+
+// StripDebugInfo - Strip debug info in the module if it exists.
+// To do this, we remove llvm.dbg.func.start, llvm.dbg.stoppoint, and
+// llvm.dbg.region.end calls, and any globals they point to if now dead.
+static bool StripDebugInfo(Module &M) {
+
+ bool Changed = false;
+
+ // Remove all of the calls to the debugger intrinsics, and remove them from
+ // the module.
+ if (Function *Declare = M.getFunction("llvm.dbg.declare")) {
+ while (!Declare->use_empty()) {
+ CallInst *CI = cast<CallInst>(Declare->use_back());
+ CI->eraseFromParent();
+ }
+ Declare->eraseFromParent();
+ Changed = true;
+ }
+
+ if (Function *DbgVal = M.getFunction("llvm.dbg.value")) {
+ while (!DbgVal->use_empty()) {
+ CallInst *CI = cast<CallInst>(DbgVal->use_back());
+ CI->eraseFromParent();
+ }
+ DbgVal->eraseFromParent();
+ Changed = true;
+ }
+
+ for (Module::named_metadata_iterator NMI = M.named_metadata_begin(),
+ NME = M.named_metadata_end(); NMI != NME;) {
+ NamedMDNode *NMD = NMI;
+ ++NMI;
+ if (NMD->getName().startswith("llvm.dbg.")) {
+ NMD->eraseFromParent();
+ Changed = true;
+ }
+ }
+
+ for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
+ for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
+ ++FI)
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
+ ++BI) {
+ if (!BI->getDebugLoc().isUnknown()) {
+ Changed = true;
+ BI->setDebugLoc(DebugLoc());
+ }
+ }
+
+ return Changed;
+}
+
+bool StripSymbols::runOnModule(Module &M) {
+ bool Changed = false;
+ Changed |= StripDebugInfo(M);
+ if (!OnlyDebugInfo)
+ Changed |= StripSymbolNames(M, false);
+ return Changed;
+}
+
+bool StripNonDebugSymbols::runOnModule(Module &M) {
+ return StripSymbolNames(M, true);
+}
+
+bool StripDebugDeclare::runOnModule(Module &M) {
+
+ Function *Declare = M.getFunction("llvm.dbg.declare");
+ std::vector<Constant*> DeadConstants;
+
+ if (Declare) {
+ while (!Declare->use_empty()) {
+ CallInst *CI = cast<CallInst>(Declare->use_back());
+ Value *Arg1 = CI->getArgOperand(0);
+ Value *Arg2 = CI->getArgOperand(1);
+ assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
+ CI->eraseFromParent();
+ if (Arg1->use_empty()) {
+ if (Constant *C = dyn_cast<Constant>(Arg1))
+ DeadConstants.push_back(C);
+ else
+ RecursivelyDeleteTriviallyDeadInstructions(Arg1);
+ }
+ if (Arg2->use_empty())
+ if (Constant *C = dyn_cast<Constant>(Arg2))
+ DeadConstants.push_back(C);
+ }
+ Declare->eraseFromParent();
+ }
+
+ while (!DeadConstants.empty()) {
+ Constant *C = DeadConstants.back();
+ DeadConstants.pop_back();
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+ if (GV->hasLocalLinkage())
+ RemoveDeadConstant(GV);
+ } else
+ RemoveDeadConstant(C);
+ }
+
+ return true;
+}
+
+/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
+/// printer to not emit usual symbol prefix before the symbol name is used then
+/// return linkage name after skipping this special LLVM prefix.
+static StringRef getRealLinkageName(StringRef LinkageName) {
+ char One = '\1';
+ if (LinkageName.startswith(StringRef(&One, 1)))
+ return LinkageName.substr(1);
+ return LinkageName;
+}
+
+bool StripDeadDebugInfo::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // Debugging infomration is encoded in llvm IR using metadata. This is designed
+ // such a way that debug info for symbols preserved even if symbols are
+ // optimized away by the optimizer. This special pass removes debug info for
+ // such symbols.
+
+ // llvm.dbg.gv keeps track of debug info for global variables.
+ if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) {
+ SmallVector<MDNode *, 8> MDs;
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ if (DIGlobalVariable(NMD->getOperand(i)).Verify())
+ MDs.push_back(NMD->getOperand(i));
+ else
+ Changed = true;
+ NMD->eraseFromParent();
+ NMD = NULL;
+
+ for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(),
+ E = MDs.end(); I != E; ++I) {
+ GlobalVariable *GV = DIGlobalVariable(*I).getGlobal();
+ if (GV && M.getGlobalVariable(GV->getName(), true)) {
+ if (!NMD)
+ NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+ NMD->addOperand(*I);
+ }
+ else
+ Changed = true;
+ }
+ }
+
+ // llvm.dbg.sp keeps track of debug info for subprograms.
+ if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) {
+ SmallVector<MDNode *, 8> MDs;
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ if (DISubprogram(NMD->getOperand(i)).Verify())
+ MDs.push_back(NMD->getOperand(i));
+ else
+ Changed = true;
+ NMD->eraseFromParent();
+ NMD = NULL;
+
+ for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(),
+ E = MDs.end(); I != E; ++I) {
+ bool FnIsLive = false;
+ if (Function *F = DISubprogram(*I).getFunction())
+ if (M.getFunction(F->getName()))
+ FnIsLive = true;
+ if (FnIsLive) {
+ if (!NMD)
+ NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
+ NMD->addOperand(*I);
+ } else {
+ // Remove llvm.dbg.lv.fnname named mdnode which may have been used
+ // to hold debug info for dead function's local variables.
+ StringRef FName = DISubprogram(*I).getLinkageName();
+ if (FName.empty())
+ FName = DISubprogram(*I).getName();
+ if (NamedMDNode *LVNMD =
+ M.getNamedMetadata(Twine("llvm.dbg.lv.",
+ getRealLinkageName(FName))))
+ LVNMD->eraseFromParent();
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp
new file mode 100644
index 0000000..b82b03f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp
@@ -0,0 +1,352 @@
+//===-- StructRetPromotion.cpp - Promote sret arguments -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass finds functions that return a struct (using a pointer to the struct
+// as the first argument of the function, marked with the 'sret' attribute) and
+// replaces them with a new function that simply returns each of the elements of
+// that struct (using multiple return values).
+//
+// This pass works under a number of conditions:
+// 1. The returned struct must not contain other structs
+// 2. The returned struct must only be used to load values from
+// 3. The placeholder struct passed in is the result of an alloca
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sretpromotion"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumRejectedSRETUses , "Number of sret rejected due to unexpected uses");
+STATISTIC(NumSRET , "Number of sret promoted");
+namespace {
+ /// SRETPromotion - This pass removes sret parameter and updates
+ /// function to use multiple return value.
+ ///
+ struct SRETPromotion : public CallGraphSCCPass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ CallGraphSCCPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnSCC(CallGraphSCC &SCC);
+ static char ID; // Pass identification, replacement for typeid
+ SRETPromotion() : CallGraphSCCPass(ID) {}
+
+ private:
+ CallGraphNode *PromoteReturn(CallGraphNode *CGN);
+ bool isSafeToUpdateAllCallers(Function *F);
+ Function *cloneFunctionBody(Function *F, const StructType *STy);
+ CallGraphNode *updateCallSites(Function *F, Function *NF);
+ };
+}
+
+char SRETPromotion::ID = 0;
+INITIALIZE_PASS(SRETPromotion, "sretpromotion",
+ "Promote sret arguments to multiple ret values", false, false);
+
+Pass *llvm::createStructRetPromotionPass() {
+ return new SRETPromotion();
+}
+
+bool SRETPromotion::runOnSCC(CallGraphSCC &SCC) {
+ bool Changed = false;
+
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+ if (CallGraphNode *NewNode = PromoteReturn(*I)) {
+ SCC.ReplaceNode(*I, NewNode);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// PromoteReturn - This method promotes function that uses StructRet paramater
+/// into a function that uses multiple return values.
+CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) {
+ Function *F = CGN->getFunction();
+
+ if (!F || F->isDeclaration() || !F->hasLocalLinkage())
+ return 0;
+
+ // Make sure that function returns struct.
+ if (F->arg_size() == 0 || !F->hasStructRetAttr() || F->doesNotReturn())
+ return 0;
+
+ DEBUG(dbgs() << "SretPromotion: Looking at sret function "
+ << F->getName() << "\n");
+
+ assert(F->getReturnType()->isVoidTy() && "Invalid function return type");
+ Function::arg_iterator AI = F->arg_begin();
+ const llvm::PointerType *FArgType = dyn_cast<PointerType>(AI->getType());
+ assert(FArgType && "Invalid sret parameter type");
+ const llvm::StructType *STy =
+ dyn_cast<StructType>(FArgType->getElementType());
+ assert(STy && "Invalid sret parameter element type");
+
+ // Check if it is ok to perform this promotion.
+ if (isSafeToUpdateAllCallers(F) == false) {
+ DEBUG(dbgs() << "SretPromotion: Not all callers can be updated\n");
+ ++NumRejectedSRETUses;
+ return 0;
+ }
+
+ DEBUG(dbgs() << "SretPromotion: sret argument will be promoted\n");
+ ++NumSRET;
+ // [1] Replace use of sret parameter
+ AllocaInst *TheAlloca = new AllocaInst(STy, NULL, "mrv",
+ F->getEntryBlock().begin());
+ Value *NFirstArg = F->arg_begin();
+ NFirstArg->replaceAllUsesWith(TheAlloca);
+
+ // [2] Find and replace ret instructions
+ for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI)
+ for(BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
+ Instruction *I = BI;
+ ++BI;
+ if (isa<ReturnInst>(I)) {
+ Value *NV = new LoadInst(TheAlloca, "mrv.ld", I);
+ ReturnInst *NR = ReturnInst::Create(F->getContext(), NV, I);
+ I->replaceAllUsesWith(NR);
+ I->eraseFromParent();
+ }
+ }
+
+ // [3] Create the new function body and insert it into the module.
+ Function *NF = cloneFunctionBody(F, STy);
+
+ // [4] Update all call sites to use new function
+ CallGraphNode *NF_CFN = updateCallSites(F, NF);
+
+ CallGraph &CG = getAnalysis<CallGraph>();
+ NF_CFN->stealCalledFunctionsFrom(CG[F]);
+
+ delete CG.removeFunctionFromModule(F);
+ return NF_CFN;
+}
+
+// Check if it is ok to perform this promotion.
+bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) {
+
+ if (F->use_empty())
+ // No users. OK to modify signature.
+ return true;
+
+ for (Value::use_iterator FnUseI = F->use_begin(), FnUseE = F->use_end();
+ FnUseI != FnUseE; ++FnUseI) {
+ // The function is passed in as an argument to (possibly) another function,
+ // we can't change it!
+ CallSite CS(*FnUseI);
+ Instruction *Call = CS.getInstruction();
+ // The function is used by something else than a call or invoke instruction,
+ // we can't change it!
+ if (!Call || !CS.isCallee(FnUseI))
+ return false;
+ CallSite::arg_iterator AI = CS.arg_begin();
+ Value *FirstArg = *AI;
+
+ if (!isa<AllocaInst>(FirstArg))
+ return false;
+
+ // Check FirstArg's users.
+ for (Value::use_iterator ArgI = FirstArg->use_begin(),
+ ArgE = FirstArg->use_end(); ArgI != ArgE; ++ArgI) {
+ User *U = *ArgI;
+ // If FirstArg user is a CallInst that does not correspond to current
+ // call site then this function F is not suitable for sret promotion.
+ if (CallInst *CI = dyn_cast<CallInst>(U)) {
+ if (CI != Call)
+ return false;
+ }
+ // If FirstArg user is a GEP whose all users are not LoadInst then
+ // this function F is not suitable for sret promotion.
+ else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ // TODO : Use dom info and insert PHINodes to collect get results
+ // from multiple call sites for this GEP.
+ if (GEP->getParent() != Call->getParent())
+ return false;
+ for (Value::use_iterator GEPI = GEP->use_begin(), GEPE = GEP->use_end();
+ GEPI != GEPE; ++GEPI)
+ if (!isa<LoadInst>(*GEPI))
+ return false;
+ }
+ // Any other FirstArg users make this function unsuitable for sret
+ // promotion.
+ else
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/// cloneFunctionBody - Create a new function based on F and
+/// insert it into module. Remove first argument. Use STy as
+/// the return type for new function.
+Function *SRETPromotion::cloneFunctionBody(Function *F,
+ const StructType *STy) {
+
+ const FunctionType *FTy = F->getFunctionType();
+ std::vector<const Type*> Params;
+
+ // Attributes - Keep track of the parameter attributes for the arguments.
+ SmallVector<AttributeWithIndex, 8> AttributesVec;
+ const AttrListPtr &PAL = F->getAttributes();
+
+ // Add any return attributes.
+ if (Attributes attrs = PAL.getRetAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+
+ // Skip first argument.
+ Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ ++I;
+ // 0th parameter attribute is reserved for return type.
+ // 1th parameter attribute is for first 1st sret argument.
+ unsigned ParamIndex = 2;
+ while (I != E) {
+ Params.push_back(I->getType());
+ if (Attributes Attrs = PAL.getParamAttributes(ParamIndex))
+ AttributesVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs));
+ ++I;
+ ++ParamIndex;
+ }
+
+ // Add any fn attributes.
+ if (Attributes attrs = PAL.getFnAttributes())
+ AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+
+
+ FunctionType *NFTy = FunctionType::get(STy, Params, FTy->isVarArg());
+ Function *NF = Function::Create(NFTy, F->getLinkage());
+ NF->takeName(F);
+ NF->copyAttributesFrom(F);
+ NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end()));
+ F->getParent()->getFunctionList().insert(F, NF);
+ NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+
+ // Replace arguments
+ I = F->arg_begin();
+ E = F->arg_end();
+ Function::arg_iterator NI = NF->arg_begin();
+ ++I;
+ while (I != E) {
+ I->replaceAllUsesWith(NI);
+ NI->takeName(I);
+ ++I;
+ ++NI;
+ }
+
+ return NF;
+}
+
+/// updateCallSites - Update all sites that call F to use NF.
+CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) {
+ CallGraph &CG = getAnalysis<CallGraph>();
+ SmallVector<Value*, 16> Args;
+
+ // Attributes - Keep track of the parameter attributes for the arguments.
+ SmallVector<AttributeWithIndex, 8> ArgAttrsVec;
+
+ // Get a new callgraph node for NF.
+ CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
+
+ while (!F->use_empty()) {
+ CallSite CS(*F->use_begin());
+ Instruction *Call = CS.getInstruction();
+
+ const AttrListPtr &PAL = F->getAttributes();
+ // Add any return attributes.
+ if (Attributes attrs = PAL.getRetAttributes())
+ ArgAttrsVec.push_back(AttributeWithIndex::get(0, attrs));
+
+ // Copy arguments, however skip first one.
+ CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+ Value *FirstCArg = *AI;
+ ++AI;
+ // 0th parameter attribute is reserved for return type.
+ // 1th parameter attribute is for first 1st sret argument.
+ unsigned ParamIndex = 2;
+ while (AI != AE) {
+ Args.push_back(*AI);
+ if (Attributes Attrs = PAL.getParamAttributes(ParamIndex))
+ ArgAttrsVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs));
+ ++ParamIndex;
+ ++AI;
+ }
+
+ // Add any function attributes.
+ if (Attributes attrs = PAL.getFnAttributes())
+ ArgAttrsVec.push_back(AttributeWithIndex::get(~0, attrs));
+
+ AttrListPtr NewPAL = AttrListPtr::get(ArgAttrsVec.begin(), ArgAttrsVec.end());
+
+ // Build new call instruction.
+ Instruction *New;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args.begin(), Args.end(), "", Call);
+ cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<InvokeInst>(New)->setAttributes(NewPAL);
+ } else {
+ New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
+ cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<CallInst>(New)->setAttributes(NewPAL);
+ if (cast<CallInst>(Call)->isTailCall())
+ cast<CallInst>(New)->setTailCall();
+ }
+ Args.clear();
+ ArgAttrsVec.clear();
+ New->takeName(Call);
+
+ // Update the callgraph to know that the callsite has been transformed.
+ CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
+ CalleeNode->removeCallEdgeFor(Call);
+ CalleeNode->addCalledFunction(New, NF_CGN);
+
+ // Update all users of sret parameter to extract value using extractvalue.
+ for (Value::use_iterator UI = FirstCArg->use_begin(),
+ UE = FirstCArg->use_end(); UI != UE; ) {
+ User *U2 = *UI++;
+ CallInst *C2 = dyn_cast<CallInst>(U2);
+ if (C2 && (C2 == Call))
+ continue;
+
+ GetElementPtrInst *UGEP = cast<GetElementPtrInst>(U2);
+ ConstantInt *Idx = cast<ConstantInt>(UGEP->getOperand(2));
+ Value *GR = ExtractValueInst::Create(New, Idx->getZExtValue(),
+ "evi", UGEP);
+ while(!UGEP->use_empty()) {
+ // isSafeToUpdateAllCallers has checked that all GEP uses are
+ // LoadInsts
+ LoadInst *L = cast<LoadInst>(*UGEP->use_begin());
+ L->replaceAllUsesWith(GR);
+ L->eraseFromParent();
+ }
+ UGEP->eraseFromParent();
+ continue;
+ }
+ Call->eraseFromParent();
+ }
+
+ return NF_CGN;
+}
+
diff --git a/contrib/llvm/lib/Transforms/InstCombine/CMakeLists.txt b/contrib/llvm/lib/Transforms/InstCombine/CMakeLists.txt
new file mode 100644
index 0000000..5b1ff3e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/CMakeLists.txt
@@ -0,0 +1,17 @@
+add_llvm_library(LLVMInstCombine
+ InstructionCombining.cpp
+ InstCombineAddSub.cpp
+ InstCombineAndOrXor.cpp
+ InstCombineCalls.cpp
+ InstCombineCasts.cpp
+ InstCombineCompares.cpp
+ InstCombineLoadStoreAlloca.cpp
+ InstCombineMulDivRem.cpp
+ InstCombinePHI.cpp
+ InstCombineSelect.cpp
+ InstCombineShifts.cpp
+ InstCombineSimplifyDemanded.cpp
+ InstCombineVectorOps.cpp
+ )
+
+target_link_libraries (LLVMInstCombine LLVMTransformUtils)
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
new file mode 100644
index 0000000..6f9609c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
@@ -0,0 +1,352 @@
+//===- InstCombine.h - Main InstCombine pass definition -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTCOMBINE_INSTCOMBINE_H
+#define INSTCOMBINE_INSTCOMBINE_H
+
+#include "InstCombineWorklist.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/TargetFolder.h"
+
+namespace llvm {
+ class CallSite;
+ class TargetData;
+ class DbgDeclareInst;
+ class MemIntrinsic;
+ class MemSetInst;
+
+/// SelectPatternFlavor - We can match a variety of different patterns for
+/// select operations.
+enum SelectPatternFlavor {
+ SPF_UNKNOWN = 0,
+ SPF_SMIN, SPF_UMIN,
+ SPF_SMAX, SPF_UMAX
+ //SPF_ABS - TODO.
+};
+
+/// getComplexity: Assign a complexity or rank value to LLVM Values...
+/// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
+static inline unsigned getComplexity(Value *V) {
+ if (isa<Instruction>(V)) {
+ if (BinaryOperator::isNeg(V) ||
+ BinaryOperator::isFNeg(V) ||
+ BinaryOperator::isNot(V))
+ return 3;
+ return 4;
+ }
+ if (isa<Argument>(V)) return 3;
+ return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2;
+}
+
+
+/// InstCombineIRInserter - This is an IRBuilder insertion helper that works
+/// just like the normal insertion helper, but also adds any new instructions
+/// to the instcombine worklist.
+class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter
+ : public IRBuilderDefaultInserter<true> {
+ InstCombineWorklist &Worklist;
+public:
+ InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {}
+
+ void InsertHelper(Instruction *I, const Twine &Name,
+ BasicBlock *BB, BasicBlock::iterator InsertPt) const {
+ IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt);
+ Worklist.Add(I);
+ }
+};
+
+/// InstCombiner - The -instcombine pass.
+class LLVM_LIBRARY_VISIBILITY InstCombiner
+ : public FunctionPass,
+ public InstVisitor<InstCombiner, Instruction*> {
+ TargetData *TD;
+ bool MustPreserveLCSSA;
+ bool MadeIRChange;
+public:
+ /// Worklist - All of the instructions that need to be simplified.
+ InstCombineWorklist Worklist;
+
+ /// Builder - This is an IRBuilder that automatically inserts new
+ /// instructions into the worklist when they are created.
+ typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy;
+ BuilderTy *Builder;
+
+ static char ID; // Pass identification, replacement for typeid
+ InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {}
+
+public:
+ virtual bool runOnFunction(Function &F);
+
+ bool DoOneIteration(Function &F, unsigned ItNum);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ TargetData *getTargetData() const { return TD; }
+
+ // Visitation implementation - Implement instruction combining for different
+ // instruction types. The semantics are as follows:
+ // Return Value:
+ // null - No change was made
+ // I - Change was made, I is still valid, I may be dead though
+ // otherwise - Change was made, replace I with returned instruction
+ //
+ Instruction *visitAdd(BinaryOperator &I);
+ Instruction *visitFAdd(BinaryOperator &I);
+ Value *OptimizePointerDifference(Value *LHS, Value *RHS, const Type *Ty);
+ Instruction *visitSub(BinaryOperator &I);
+ Instruction *visitFSub(BinaryOperator &I);
+ Instruction *visitMul(BinaryOperator &I);
+ Instruction *visitFMul(BinaryOperator &I);
+ Instruction *visitURem(BinaryOperator &I);
+ Instruction *visitSRem(BinaryOperator &I);
+ Instruction *visitFRem(BinaryOperator &I);
+ bool SimplifyDivRemOfSelect(BinaryOperator &I);
+ Instruction *commonRemTransforms(BinaryOperator &I);
+ Instruction *commonIRemTransforms(BinaryOperator &I);
+ Instruction *commonDivTransforms(BinaryOperator &I);
+ Instruction *commonIDivTransforms(BinaryOperator &I);
+ Instruction *visitUDiv(BinaryOperator &I);
+ Instruction *visitSDiv(BinaryOperator &I);
+ Instruction *visitFDiv(BinaryOperator &I);
+ Value *FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+ Value *FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
+ Instruction *visitAnd(BinaryOperator &I);
+ Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+ Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
+ Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
+ Value *A, Value *B, Value *C);
+ Instruction *visitOr (BinaryOperator &I);
+ Instruction *visitXor(BinaryOperator &I);
+ Instruction *visitShl(BinaryOperator &I);
+ Instruction *visitAShr(BinaryOperator &I);
+ Instruction *visitLShr(BinaryOperator &I);
+ Instruction *commonShiftTransforms(BinaryOperator &I);
+ Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI,
+ Constant *RHSC);
+ Instruction *FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
+ GlobalVariable *GV, CmpInst &ICI,
+ ConstantInt *AndCst = 0);
+ Instruction *visitFCmpInst(FCmpInst &I);
+ Instruction *visitICmpInst(ICmpInst &I);
+ Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI);
+ Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
+ Instruction *LHS,
+ ConstantInt *RHS);
+ Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+ ConstantInt *DivRHS);
+ Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI,
+ ICmpInst::Predicate Pred, Value *TheAdd);
+ Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
+ ICmpInst::Predicate Cond, Instruction &I);
+ Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
+ BinaryOperator &I);
+ Instruction *commonCastTransforms(CastInst &CI);
+ Instruction *commonPointerCastTransforms(CastInst &CI);
+ Instruction *visitTrunc(TruncInst &CI);
+ Instruction *visitZExt(ZExtInst &CI);
+ Instruction *visitSExt(SExtInst &CI);
+ Instruction *visitFPTrunc(FPTruncInst &CI);
+ Instruction *visitFPExt(CastInst &CI);
+ Instruction *visitFPToUI(FPToUIInst &FI);
+ Instruction *visitFPToSI(FPToSIInst &FI);
+ Instruction *visitUIToFP(CastInst &CI);
+ Instruction *visitSIToFP(CastInst &CI);
+ Instruction *visitPtrToInt(PtrToIntInst &CI);
+ Instruction *visitIntToPtr(IntToPtrInst &CI);
+ Instruction *visitBitCast(BitCastInst &CI);
+ Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI,
+ Instruction *FI);
+ Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*);
+ Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1,
+ Value *A, Value *B, Instruction &Outer,
+ SelectPatternFlavor SPF2, Value *C);
+ Instruction *visitSelectInst(SelectInst &SI);
+ Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
+ Instruction *visitCallInst(CallInst &CI);
+ Instruction *visitInvokeInst(InvokeInst &II);
+
+ Instruction *SliceUpIllegalIntegerPHI(PHINode &PN);
+ Instruction *visitPHINode(PHINode &PN);
+ Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP);
+ Instruction *visitAllocaInst(AllocaInst &AI);
+ Instruction *visitMalloc(Instruction &FI);
+ Instruction *visitFree(CallInst &FI);
+ Instruction *visitLoadInst(LoadInst &LI);
+ Instruction *visitStoreInst(StoreInst &SI);
+ Instruction *visitBranchInst(BranchInst &BI);
+ Instruction *visitSwitchInst(SwitchInst &SI);
+ Instruction *visitInsertElementInst(InsertElementInst &IE);
+ Instruction *visitExtractElementInst(ExtractElementInst &EI);
+ Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI);
+ Instruction *visitExtractValueInst(ExtractValueInst &EV);
+
+ // visitInstruction - Specify what to return for unhandled instructions...
+ Instruction *visitInstruction(Instruction &I) { return 0; }
+
+private:
+ bool ShouldChangeType(const Type *From, const Type *To) const;
+ Value *dyn_castNegVal(Value *V) const;
+ Value *dyn_castFNegVal(Value *V) const;
+ const Type *FindElementAtOffset(const Type *Ty, int64_t Offset,
+ SmallVectorImpl<Value*> &NewIndices);
+ Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
+
+ /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
+ /// results in any code being generated and is interesting to optimize out. If
+ /// the cast can be eliminated by some other simple transformation, we prefer
+ /// to do the simplification first.
+ bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V,
+ const Type *Ty);
+
+ Instruction *visitCallSite(CallSite CS);
+ Instruction *tryOptimizeCall(CallInst *CI, const TargetData *TD);
+ bool transformConstExprCastCall(CallSite CS);
+ Instruction *transformCallThroughTrampoline(CallSite CS);
+ Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
+ bool DoXform = true);
+ bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
+ DbgDeclareInst *hasOneUsePlusDeclare(Value *V);
+ Value *EmitGEPOffset(User *GEP);
+
+public:
+ // InsertNewInstBefore - insert an instruction New before instruction Old
+ // in the program. Add the new instruction to the worklist.
+ //
+ Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) {
+ assert(New && New->getParent() == 0 &&
+ "New instruction already inserted into a basic block!");
+ BasicBlock *BB = Old.getParent();
+ BB->getInstList().insert(&Old, New); // Insert inst
+ Worklist.Add(New);
+ return New;
+ }
+
+ // ReplaceInstUsesWith - This method is to be used when an instruction is
+ // found to be dead, replacable with another preexisting expression. Here
+ // we add all uses of I to the worklist, replace all uses of I with the new
+ // value, then return I, so that the inst combiner will know that I was
+ // modified.
+ //
+ Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
+ Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist.
+
+ // If we are replacing the instruction with itself, this must be in a
+ // segment of unreachable code, so just clobber the instruction.
+ if (&I == V)
+ V = UndefValue::get(I.getType());
+
+ I.replaceAllUsesWith(V);
+ return &I;
+ }
+
+ // EraseInstFromFunction - When dealing with an instruction that has side
+ // effects or produces a void value, we can't rely on DCE to delete the
+ // instruction. Instead, visit methods should return the value returned by
+ // this function.
+ Instruction *EraseInstFromFunction(Instruction &I) {
+ DEBUG(errs() << "IC: ERASE " << I << '\n');
+
+ assert(I.use_empty() && "Cannot erase instruction that is used!");
+ // Make sure that we reprocess all operands now that we reduced their
+ // use counts.
+ if (I.getNumOperands() < 8) {
+ for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
+ if (Instruction *Op = dyn_cast<Instruction>(*i))
+ Worklist.Add(Op);
+ }
+ Worklist.Remove(&I);
+ I.eraseFromParent();
+ MadeIRChange = true;
+ return 0; // Don't do anything with FI
+ }
+
+ void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero,
+ APInt &KnownOne, unsigned Depth = 0) const {
+ return llvm::ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+ }
+
+ bool MaskedValueIsZero(Value *V, const APInt &Mask,
+ unsigned Depth = 0) const {
+ return llvm::MaskedValueIsZero(V, Mask, TD, Depth);
+ }
+ unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const {
+ return llvm::ComputeNumSignBits(Op, TD, Depth);
+ }
+
+private:
+
+ /// SimplifyCommutative - This performs a few simplifications for
+ /// commutative operators.
+ bool SimplifyCommutative(BinaryOperator &I);
+
+ /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
+ /// based on the demanded bits.
+ Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
+ APInt& KnownZero, APInt& KnownOne,
+ unsigned Depth);
+ bool SimplifyDemandedBits(Use &U, APInt DemandedMask,
+ APInt& KnownZero, APInt& KnownOne,
+ unsigned Depth=0);
+
+ /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
+ /// SimplifyDemandedBits knows about. See if the instruction has any
+ /// properties that allow us to simplify its operands.
+ bool SimplifyDemandedInstructionBits(Instruction &Inst);
+
+ Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
+ APInt& UndefElts, unsigned Depth = 0);
+
+ // FoldOpIntoPhi - Given a binary operator, cast instruction, or select
+ // which has a PHI node as operand #0, see if we can fold the instruction
+ // into the PHI (which is only possible if all operands to the PHI are
+ // constants).
+ //
+ // If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms
+ // that would normally be unprofitable because they strongly encourage jump
+ // threading.
+ Instruction *FoldOpIntoPhi(Instruction &I, bool AllowAggressive = false);
+
+ // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
+ // operator and they all are only used by the PHI, PHI together their
+ // inputs, and do the operation once, to the result of the PHI.
+ Instruction *FoldPHIArgOpIntoPHI(PHINode &PN);
+ Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN);
+ Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN);
+ Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN);
+
+
+ Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS,
+ ConstantInt *AndRHS, BinaryOperator &TheAnd);
+
+ Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask,
+ bool isSub, Instruction &I);
+ Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
+ bool isSigned, bool Inside);
+ Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
+ Instruction *MatchBSwap(BinaryOperator &I);
+ bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
+ Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
+ Instruction *SimplifyMemSet(MemSetInst *MI);
+
+
+ Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);
+
+ unsigned GetOrEnforceKnownAlignment(Value *V,
+ unsigned PrefAlign = 0);
+
+};
+
+
+
+} // end namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
new file mode 100644
index 0000000..4d2c89e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -0,0 +1,731 @@
+//===- InstCombineAddSub.cpp ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for add, fadd, sub, and fsub.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// AddOne - Add one to a ConstantInt.
+static Constant *AddOne(Constant *C) {
+ return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+}
+/// SubOne - Subtract one from a ConstantInt.
+static Constant *SubOne(ConstantInt *C) {
+ return ConstantInt::get(C->getContext(), C->getValue()-1);
+}
+
+
+// dyn_castFoldableMul - If this value is a multiply that can be folded into
+// other computations (because it has a constant operand), return the
+// non-constant operand of the multiply, and set CST to point to the multiplier.
+// Otherwise, return null.
+//
+static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
+ if (!V->hasOneUse() || !V->getType()->isIntegerTy())
+ return 0;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return 0;
+
+ if (I->getOpcode() == Instruction::Mul)
+ if ((CST = dyn_cast<ConstantInt>(I->getOperand(1))))
+ return I->getOperand(0);
+ if (I->getOpcode() == Instruction::Shl)
+ if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) {
+ // The multiplier is really 1 << CST.
+ uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+ uint32_t CSTVal = CST->getLimitedValue(BitWidth);
+ CST = ConstantInt::get(V->getType()->getContext(),
+ APInt(BitWidth, 1).shl(CSTVal));
+ return I->getOperand(0);
+ }
+ return 0;
+}
+
+
+/// WillNotOverflowSignedAdd - Return true if we can prove that:
+/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS))
+/// This basically requires proving that the add in the original type would not
+/// overflow to change the sign bit or have a carry out.
+bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
+ // There are different heuristics we can use for this. Here are some simple
+ // ones.
+
+ // Add has the property that adding any two 2's complement numbers can only
+ // have one carry bit which can change a sign. As such, if LHS and RHS each
+ // have at least two sign bits, we know that the addition of the two values
+ // will sign extend fine.
+ if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
+ return true;
+
+
+ // If one of the operands only has one non-zero bit, and if the other operand
+ // has a known-zero bit in a more significant place than it (not including the
+ // sign bit) the ripple may go up to and fill the zero, but won't change the
+ // sign. For example, (X & ~4) + 1.
+
+ // TODO: Implement.
+
+ return false;
+}
+
+Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
+ bool Changed = SimplifyCommutative(I);
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+ if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
+ I.hasNoUnsignedWrap(), TD))
+ return ReplaceInstUsesWith(I, V);
+
+
+ if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) {
+ // X + (signbit) --> X ^ signbit
+ const APInt& Val = CI->getValue();
+ uint32_t BitWidth = Val.getBitWidth();
+ if (Val == APInt::getSignBit(BitWidth))
+ return BinaryOperator::CreateXor(LHS, RHS);
+
+ // See if SimplifyDemandedBits can simplify this. This handles stuff like
+ // (X & 254)+1 -> (X&254)|1
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ // zext(bool) + C -> bool ? C + 1 : C
+ if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
+ if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext()))
+ return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
+ }
+
+ if (isa<PHINode>(LHS))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+
+ ConstantInt *XorRHS = 0;
+ Value *XorLHS = 0;
+ if (isa<ConstantInt>(RHSC) &&
+ match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
+ uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
+ const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue();
+ unsigned ExtendAmt = 0;
+ // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
+ // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
+ if (XorRHS->getValue() == -RHSVal) {
+ if (RHSVal.isPowerOf2())
+ ExtendAmt = TySizeBits - RHSVal.logBase2() - 1;
+ else if (XorRHS->getValue().isPowerOf2())
+ ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1;
+ }
+
+ if (ExtendAmt) {
+ APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt);
+ if (!MaskedValueIsZero(XorLHS, Mask))
+ ExtendAmt = 0;
+ }
+
+ if (ExtendAmt) {
+ Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt);
+ Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext");
+ return BinaryOperator::CreateAShr(NewShl, ShAmt);
+ }
+ }
+ }
+
+ if (I.getType()->isIntegerTy(1))
+ return BinaryOperator::CreateXor(LHS, RHS);
+
+ if (I.getType()->isIntegerTy()) {
+ // X + X --> X << 1
+ if (LHS == RHS)
+ return BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1));
+
+ if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) {
+ if (RHSI->getOpcode() == Instruction::Sub)
+ if (LHS == RHSI->getOperand(1)) // A + (B - A) --> B
+ return ReplaceInstUsesWith(I, RHSI->getOperand(0));
+ }
+ if (Instruction *LHSI = dyn_cast<Instruction>(LHS)) {
+ if (LHSI->getOpcode() == Instruction::Sub)
+ if (RHS == LHSI->getOperand(1)) // (B - A) + A --> B
+ return ReplaceInstUsesWith(I, LHSI->getOperand(0));
+ }
+ }
+
+ // -A + B --> B - A
+ // -A + -B --> -(A + B)
+ if (Value *LHSV = dyn_castNegVal(LHS)) {
+ if (LHS->getType()->isIntOrIntVectorTy()) {
+ if (Value *RHSV = dyn_castNegVal(RHS)) {
+ Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
+ return BinaryOperator::CreateNeg(NewAdd);
+ }
+ }
+
+ return BinaryOperator::CreateSub(RHS, LHSV);
+ }
+
+ // A + -B --> A - B
+ if (!isa<Constant>(RHS))
+ if (Value *V = dyn_castNegVal(RHS))
+ return BinaryOperator::CreateSub(LHS, V);
+
+
+ ConstantInt *C2;
+ if (Value *X = dyn_castFoldableMul(LHS, C2)) {
+ if (X == RHS) // X*C + X --> X * (C+1)
+ return BinaryOperator::CreateMul(RHS, AddOne(C2));
+
+ // X*C1 + X*C2 --> X * (C1+C2)
+ ConstantInt *C1;
+ if (X == dyn_castFoldableMul(RHS, C1))
+ return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
+ }
+
+ // X + X*C --> X * (C+1)
+ if (dyn_castFoldableMul(RHS, C2) == LHS)
+ return BinaryOperator::CreateMul(LHS, AddOne(C2));
+
+ // X + ~X --> -1 since ~X = -X-1
+ if (match(LHS, m_Not(m_Specific(RHS))) ||
+ match(RHS, m_Not(m_Specific(LHS))))
+ return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
+
+ // A+B --> A|B iff A and B have no bits set in common.
+ if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
+ APInt Mask = APInt::getAllOnesValue(IT->getBitWidth());
+ APInt LHSKnownOne(IT->getBitWidth(), 0);
+ APInt LHSKnownZero(IT->getBitWidth(), 0);
+ ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+ if (LHSKnownZero != 0) {
+ APInt RHSKnownOne(IT->getBitWidth(), 0);
+ APInt RHSKnownZero(IT->getBitWidth(), 0);
+ ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+
+ // No bits in common -> bitwise or.
+ if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
+ return BinaryOperator::CreateOr(LHS, RHS);
+ }
+ }
+
+ // W*X + Y*Z --> W * (X+Z) iff W == Y
+ if (I.getType()->isIntOrIntVectorTy()) {
+ Value *W, *X, *Y, *Z;
+ if (match(LHS, m_Mul(m_Value(W), m_Value(X))) &&
+ match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) {
+ if (W != Y) {
+ if (W == Z) {
+ std::swap(Y, Z);
+ } else if (Y == X) {
+ std::swap(W, X);
+ } else if (X == Z) {
+ std::swap(Y, Z);
+ std::swap(W, X);
+ }
+ }
+
+ if (W == Y) {
+ Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName());
+ return BinaryOperator::CreateMul(W, NewAdd);
+ }
+ }
+ }
+
+ if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
+ Value *X = 0;
+ if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X
+ return BinaryOperator::CreateSub(SubOne(CRHS), X);
+
+ // (X & FF00) + xx00 -> (X+xx00) & FF00
+ if (LHS->hasOneUse() &&
+ match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
+ Constant *Anded = ConstantExpr::getAnd(CRHS, C2);
+ if (Anded == CRHS) {
+ // See if all bits from the first bit set in the Add RHS up are included
+ // in the mask. First, get the rightmost bit.
+ const APInt &AddRHSV = CRHS->getValue();
+
+ // Form a mask of all bits from the lowest bit added through the top.
+ APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
+
+ // See if the and mask includes all of these bits.
+ APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());
+
+ if (AddRHSHighBits == AddRHSHighBitsAnd) {
+ // Okay, the xform is safe. Insert the new add pronto.
+ Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
+ return BinaryOperator::CreateAnd(NewAdd, C2);
+ }
+ }
+ }
+
+ // Try to fold constant add into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(LHS))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ }
+
+ // add (select X 0 (sub n A)) A --> select X A n
+ {
+ SelectInst *SI = dyn_cast<SelectInst>(LHS);
+ Value *A = RHS;
+ if (!SI) {
+ SI = dyn_cast<SelectInst>(RHS);
+ A = LHS;
+ }
+ if (SI && SI->hasOneUse()) {
+ Value *TV = SI->getTrueValue();
+ Value *FV = SI->getFalseValue();
+ Value *N;
+
+ // Can we fold the add into the argument of the select?
+ // We check both true and false select arguments for a matching subtract.
+ if (match(FV, m_Zero()) &&
+ match(TV, m_Sub(m_Value(N), m_Specific(A))))
+ // Fold the add into the true select value.
+ return SelectInst::Create(SI->getCondition(), N, A);
+ if (match(TV, m_Zero()) &&
+ match(FV, m_Sub(m_Value(N), m_Specific(A))))
+ // Fold the add into the false select value.
+ return SelectInst::Create(SI->getCondition(), A, N);
+ }
+ }
+
+ // Check for (add (sext x), y), see if we can merge this into an
+ // integer add followed by a sext.
+ if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
+ // (add (sext x), cst) --> (sext (add x, cst'))
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
+ Constant *CI =
+ ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
+ if (LHSConv->hasOneUse() &&
+ ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
+ // Insert the new, smaller add.
+ Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+ CI, "addconv");
+ return new SExtInst(NewAdd, I.getType());
+ }
+ }
+
+ // (add (sext x), (sext y)) --> (sext (add int x, y))
+ if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
+ // Only do this if x/y have the same type, if at last one of them has a
+ // single use (so we don't increase the number of sexts), and if the
+ // integer add will not overflow.
+ if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+ (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0))) {
+ // Insert the new integer add.
+ Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0), "addconv");
+ return new SExtInst(NewAdd, I.getType());
+ }
+ }
+ }
+
+ return Changed ? &I : 0;
+}
+
+Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
+ bool Changed = SimplifyCommutative(I);
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+ if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+ // X + 0 --> X
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
+ if (CFP->isExactlyValue(ConstantFP::getNegativeZero
+ (I.getType())->getValueAPF()))
+ return ReplaceInstUsesWith(I, LHS);
+ }
+
+ if (isa<PHINode>(LHS))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ // -A + B --> B - A
+ // -A + -B --> -(A + B)
+ if (Value *LHSV = dyn_castFNegVal(LHS))
+ return BinaryOperator::CreateFSub(RHS, LHSV);
+
+ // A + -B --> A - B
+ if (!isa<Constant>(RHS))
+ if (Value *V = dyn_castFNegVal(RHS))
+ return BinaryOperator::CreateFSub(LHS, V);
+
+ // Check for X+0.0. Simplify it to X if we know X is not -0.0.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
+ if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
+ return ReplaceInstUsesWith(I, LHS);
+
+ // Check for (fadd double (sitofp x), y), see if we can merge this into an
+ // integer add followed by a promotion.
+ if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
+ // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
+ // ... if the constant fits in the integer value. This is useful for things
+ // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
+ // requires a constant pool load, and generally allows the add to be better
+ // instcombined.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
+ Constant *CI =
+ ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
+ if (LHSConv->hasOneUse() &&
+ ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
+ // Insert the new integer add.
+ Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+ CI, "addconv");
+ return new SIToFPInst(NewAdd, I.getType());
+ }
+ }
+
+ // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
+ if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
+ // Only do this if x/y have the same type, if at last one of them has a
+ // single use (so we don't increase the number of int->fp conversions),
+ // and if the integer add will not overflow.
+ if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+ (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0))) {
+ // Insert the new integer add.
+ Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0),"addconv");
+ return new SIToFPInst(NewAdd, I.getType());
+ }
+ }
+ }
+
+ return Changed ? &I : 0;
+}
+
+
+/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the
+/// code necessary to compute the offset from the base pointer (without adding
+/// in the base pointer). Return the result as a signed integer of intptr size.
+Value *InstCombiner::EmitGEPOffset(User *GEP) {
+ TargetData &TD = *getTargetData();
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext());
+ Value *Result = Constant::getNullValue(IntPtrTy);
+
+ // Build a mask for high order bits.
+ unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
+
+ for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
+ ++i, ++GTI) {
+ Value *Op = *i;
+ uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask;
+ if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
+ if (OpC->isZero()) continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+
+ Result = Builder->CreateAdd(Result,
+ ConstantInt::get(IntPtrTy, Size),
+ GEP->getName()+".offs");
+ continue;
+ }
+
+ Constant *Scale = ConstantInt::get(IntPtrTy, Size);
+ Constant *OC =
+ ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
+ Scale = ConstantExpr::getMul(OC, Scale);
+ // Emit an add instruction.
+ Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");
+ continue;
+ }
+ // Convert to correct type.
+ if (Op->getType() != IntPtrTy)
+ Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
+ if (Size != 1) {
+ Constant *Scale = ConstantInt::get(IntPtrTy, Size);
+ // We'll let instcombine(mul) convert this to a shl if possible.
+ Op = Builder->CreateMul(Op, Scale, GEP->getName()+".idx");
+ }
+
+ // Emit an add instruction.
+ Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs");
+ }
+ return Result;
+}
+
+
+
+
+/// Optimize pointer differences into the same array into a size. Consider:
+/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer
+/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
+///
+Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
+ const Type *Ty) {
+ assert(TD && "Must have target data info for this");
+
+ // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
+ // this.
+ bool Swapped = false;
+ GetElementPtrInst *GEP = 0;
+ ConstantExpr *CstGEP = 0;
+
+ // TODO: Could also optimize &A[i] - &A[j] -> "i-j", and "&A.foo[i] - &A.foo".
+ // For now we require one side to be the base pointer "A" or a constant
+ // expression derived from it.
+ if (GetElementPtrInst *LHSGEP = dyn_cast<GetElementPtrInst>(LHS)) {
+ // (gep X, ...) - X
+ if (LHSGEP->getOperand(0) == RHS) {
+ GEP = LHSGEP;
+ Swapped = false;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(RHS)) {
+ // (gep X, ...) - (ce_gep X, ...)
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ LHSGEP->getOperand(0) == CE->getOperand(0)) {
+ CstGEP = CE;
+ GEP = LHSGEP;
+ Swapped = false;
+ }
+ }
+ }
+
+ if (GetElementPtrInst *RHSGEP = dyn_cast<GetElementPtrInst>(RHS)) {
+ // X - (gep X, ...)
+ if (RHSGEP->getOperand(0) == LHS) {
+ GEP = RHSGEP;
+ Swapped = true;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(LHS)) {
+ // (ce_gep X, ...) - (gep X, ...)
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ RHSGEP->getOperand(0) == CE->getOperand(0)) {
+ CstGEP = CE;
+ GEP = RHSGEP;
+ Swapped = true;
+ }
+ }
+ }
+
+ if (GEP == 0)
+ return 0;
+
+ // Emit the offset of the GEP and an intptr_t.
+ Value *Result = EmitGEPOffset(GEP);
+
+ // If we had a constant expression GEP on the other side offsetting the
+ // pointer, subtract it from the offset we have.
+ if (CstGEP) {
+ Value *CstOffset = EmitGEPOffset(CstGEP);
+ Result = Builder->CreateSub(Result, CstOffset);
+ }
+
+
+ // If we have p - gep(p, ...) then we have to negate the result.
+ if (Swapped)
+ Result = Builder->CreateNeg(Result, "diff.neg");
+
+ return Builder->CreateIntCast(Result, Ty, true);
+}
+
+
+Instruction *InstCombiner::visitSub(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Op0 == Op1) // sub X, X -> 0
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+ // If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW.
+ if (Value *V = dyn_castNegVal(Op1)) {
+ BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V);
+ Res->setHasNoSignedWrap(I.hasNoSignedWrap());
+ Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+ return Res;
+ }
+
+ if (isa<UndefValue>(Op0))
+ return ReplaceInstUsesWith(I, Op0); // undef - X -> undef
+ if (isa<UndefValue>(Op1))
+ return ReplaceInstUsesWith(I, Op1); // X - undef -> undef
+ if (I.getType()->isIntegerTy(1))
+ return BinaryOperator::CreateXor(Op0, Op1);
+
+ if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
+ // Replace (-1 - A) with (~A).
+ if (C->isAllOnesValue())
+ return BinaryOperator::CreateNot(Op1);
+
+ // C - ~X == X + (1+C)
+ Value *X = 0;
+ if (match(Op1, m_Not(m_Value(X))))
+ return BinaryOperator::CreateAdd(X, AddOne(C));
+
+ // -(X >>u 31) -> (X >>s 31)
+ // -(X >>s 31) -> (X >>u 31)
+ if (C->isZero()) {
+ if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1)) {
+ if (SI->getOpcode() == Instruction::LShr) {
+ if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
+ // Check to see if we are shifting out everything but the sign bit.
+ if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
+ SI->getType()->getPrimitiveSizeInBits()-1) {
+ // Ok, the transformation is safe. Insert AShr.
+ return BinaryOperator::Create(Instruction::AShr,
+ SI->getOperand(0), CU, SI->getName());
+ }
+ }
+ } else if (SI->getOpcode() == Instruction::AShr) {
+ if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
+ // Check to see if we are shifting out everything but the sign bit.
+ if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
+ SI->getType()->getPrimitiveSizeInBits()-1) {
+ // Ok, the transformation is safe. Insert LShr.
+ return BinaryOperator::CreateLShr(
+ SI->getOperand(0), CU, SI->getName());
+ }
+ }
+ }
+ }
+ }
+
+ // Try to fold constant sub into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
+ // C - zext(bool) -> bool ? C - 1 : C
+ if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1))
+ if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext()))
+ return SelectInst::Create(ZI->getOperand(0), SubOne(C), C);
+ }
+
+ if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
+ if (Op1I->getOpcode() == Instruction::Add) {
+ if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y
+ return BinaryOperator::CreateNeg(Op1I->getOperand(1),
+ I.getName());
+ else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y
+ return BinaryOperator::CreateNeg(Op1I->getOperand(0),
+ I.getName());
+ else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {
+ if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))
+ // C1-(X+C2) --> (C1-C2)-X
+ return BinaryOperator::CreateSub(
+ ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0));
+ }
+ }
+
+ if (Op1I->hasOneUse()) {
+ // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression
+ // is not used by anyone else...
+ //
+ if (Op1I->getOpcode() == Instruction::Sub) {
+ // Swap the two operands of the subexpr...
+ Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1);
+ Op1I->setOperand(0, IIOp1);
+ Op1I->setOperand(1, IIOp0);
+
+ // Create the new top level add instruction...
+ return BinaryOperator::CreateAdd(Op0, Op1);
+ }
+
+ // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)...
+ //
+ if (Op1I->getOpcode() == Instruction::And &&
+ (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) {
+ Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0);
+
+ Value *NewNot = Builder->CreateNot(OtherOp, "B.not");
+ return BinaryOperator::CreateAnd(Op0, NewNot);
+ }
+
+ // 0 - (X sdiv C) -> (X sdiv -C)
+ if (Op1I->getOpcode() == Instruction::SDiv)
+ if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
+ if (CSI->isZero())
+ if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1)))
+ return BinaryOperator::CreateSDiv(Op1I->getOperand(0),
+ ConstantExpr::getNeg(DivRHS));
+
+ // 0 - (C << X) -> (-C << X)
+ if (Op1I->getOpcode() == Instruction::Shl)
+ if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
+ if (CSI->isZero())
+ if (Value *ShlLHSNeg = dyn_castNegVal(Op1I->getOperand(0)))
+ return BinaryOperator::CreateShl(ShlLHSNeg, Op1I->getOperand(1));
+
+ // X - X*C --> X * (1-C)
+ ConstantInt *C2 = 0;
+ if (dyn_castFoldableMul(Op1I, C2) == Op0) {
+ Constant *CP1 =
+ ConstantExpr::getSub(ConstantInt::get(I.getType(), 1),
+ C2);
+ return BinaryOperator::CreateMul(Op0, CP1);
+ }
+ }
+ }
+
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+ if (Op0I->getOpcode() == Instruction::Add) {
+ if (Op0I->getOperand(0) == Op1) // (Y+X)-Y == X
+ return ReplaceInstUsesWith(I, Op0I->getOperand(1));
+ else if (Op0I->getOperand(1) == Op1) // (X+Y)-Y == X
+ return ReplaceInstUsesWith(I, Op0I->getOperand(0));
+ } else if (Op0I->getOpcode() == Instruction::Sub) {
+ if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y
+ return BinaryOperator::CreateNeg(Op0I->getOperand(1),
+ I.getName());
+ }
+ }
+
+ ConstantInt *C1;
+ if (Value *X = dyn_castFoldableMul(Op0, C1)) {
+ if (X == Op1) // X*C - X --> X * (C-1)
+ return BinaryOperator::CreateMul(Op1, SubOne(C1));
+
+ ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2)
+ if (X == dyn_castFoldableMul(Op1, C2))
+ return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
+ }
+
+ // Optimize pointer differences into the same array into a size. Consider:
+ // &A[10] - &A[0]: we should compile this to "10".
+ if (TD) {
+ Value *LHSOp, *RHSOp;
+ if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
+ match(Op1, m_PtrToInt(m_Value(RHSOp))))
+ if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+ return ReplaceInstUsesWith(I, Res);
+
+ // trunc(p)-trunc(q) -> trunc(p-q)
+ if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
+ match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
+ if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+ return ReplaceInstUsesWith(I, Res);
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // If this is a 'B = x-(-A)', change to B = x+A...
+ if (Value *V = dyn_castFNegVal(Op1))
+ return BinaryOperator::CreateFAdd(Op0, V);
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
new file mode 100644
index 0000000..19a05bf
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -0,0 +1,1994 @@
+//===- InstCombineAndOrXor.cpp --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitAnd, visitOr, and visitXor functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+
+/// AddOne - Add one to a ConstantInt.
+static Constant *AddOne(Constant *C) {
+ return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+}
+/// SubOne - Subtract one from a ConstantInt.
+static Constant *SubOne(ConstantInt *C) {
+ return ConstantInt::get(C->getContext(), C->getValue()-1);
+}
+
+/// isFreeToInvert - Return true if the specified value is free to invert (apply
+/// ~ to). This happens in cases where the ~ can be eliminated.
+static inline bool isFreeToInvert(Value *V) {
+ // ~(~(X)) -> X.
+ if (BinaryOperator::isNot(V))
+ return true;
+
+ // Constants can be considered to be not'ed values.
+ if (isa<ConstantInt>(V))
+ return true;
+
+ // Compares can be inverted if they have a single use.
+ if (CmpInst *CI = dyn_cast<CmpInst>(V))
+ return CI->hasOneUse();
+
+ return false;
+}
+
+static inline Value *dyn_castNotVal(Value *V) {
+ // If this is not(not(x)) don't return that this is a not: we want the two
+ // not's to be folded first.
+ if (BinaryOperator::isNot(V)) {
+ Value *Operand = BinaryOperator::getNotArgument(V);
+ if (!isFreeToInvert(Operand))
+ return Operand;
+ }
+
+ // Constants can be considered to be not'ed values...
+ if (ConstantInt *C = dyn_cast<ConstantInt>(V))
+ return ConstantInt::get(C->getType(), ~C->getValue());
+ return 0;
+}
+
+
+/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits
+/// are carefully arranged to allow folding of expressions such as:
+///
+/// (A < B) | (A > B) --> (A != B)
+///
+/// Note that this is only valid if the first and second predicates have the
+/// same sign. Is illegal to do: (A u< B) | (A s> B)
+///
+/// Three bits are used to represent the condition, as follows:
+/// 0 A > B
+/// 1 A == B
+/// 2 A < B
+///
+/// <=> Value Definition
+/// 000 0 Always false
+/// 001 1 A > B
+/// 010 2 A == B
+/// 011 3 A >= B
+/// 100 4 A < B
+/// 101 5 A != B
+/// 110 6 A <= B
+/// 111 7 Always true
+///
+static unsigned getICmpCode(const ICmpInst *ICI) {
+ switch (ICI->getPredicate()) {
+ // False -> 0
+ case ICmpInst::ICMP_UGT: return 1; // 001
+ case ICmpInst::ICMP_SGT: return 1; // 001
+ case ICmpInst::ICMP_EQ: return 2; // 010
+ case ICmpInst::ICMP_UGE: return 3; // 011
+ case ICmpInst::ICMP_SGE: return 3; // 011
+ case ICmpInst::ICMP_ULT: return 4; // 100
+ case ICmpInst::ICMP_SLT: return 4; // 100
+ case ICmpInst::ICMP_NE: return 5; // 101
+ case ICmpInst::ICMP_ULE: return 6; // 110
+ case ICmpInst::ICMP_SLE: return 6; // 110
+ // True -> 7
+ default:
+ llvm_unreachable("Invalid ICmp predicate!");
+ return 0;
+ }
+}
+
+/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp
+/// predicate into a three bit mask. It also returns whether it is an ordered
+/// predicate by reference.
+static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
+ isOrdered = false;
+ switch (CC) {
+ case FCmpInst::FCMP_ORD: isOrdered = true; return 0; // 000
+ case FCmpInst::FCMP_UNO: return 0; // 000
+ case FCmpInst::FCMP_OGT: isOrdered = true; return 1; // 001
+ case FCmpInst::FCMP_UGT: return 1; // 001
+ case FCmpInst::FCMP_OEQ: isOrdered = true; return 2; // 010
+ case FCmpInst::FCMP_UEQ: return 2; // 010
+ case FCmpInst::FCMP_OGE: isOrdered = true; return 3; // 011
+ case FCmpInst::FCMP_UGE: return 3; // 011
+ case FCmpInst::FCMP_OLT: isOrdered = true; return 4; // 100
+ case FCmpInst::FCMP_ULT: return 4; // 100
+ case FCmpInst::FCMP_ONE: isOrdered = true; return 5; // 101
+ case FCmpInst::FCMP_UNE: return 5; // 101
+ case FCmpInst::FCMP_OLE: isOrdered = true; return 6; // 110
+ case FCmpInst::FCMP_ULE: return 6; // 110
+ // True -> 7
+ default:
+ // Not expecting FCMP_FALSE and FCMP_TRUE;
+ llvm_unreachable("Unexpected FCmp predicate!");
+ return 0;
+ }
+}
+
+/// getICmpValue - This is the complement of getICmpCode, which turns an
+/// opcode and two operands into either a constant true or false, or a brand
+/// new ICmp instruction. The sign is passed in to determine which kind
+/// of predicate to use in the new icmp instruction.
+static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
+ InstCombiner::BuilderTy *Builder) {
+ CmpInst::Predicate Pred;
+ switch (Code) {
+ default: assert(0 && "Illegal ICmp code!");
+ case 0: // False.
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+ case 1: Pred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
+ case 2: Pred = ICmpInst::ICMP_EQ; break;
+ case 3: Pred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
+ case 4: Pred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
+ case 5: Pred = ICmpInst::ICMP_NE; break;
+ case 6: Pred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
+ case 7: // True.
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
+ }
+ return Builder->CreateICmp(Pred, LHS, RHS);
+}
+
+/// getFCmpValue - This is the complement of getFCmpCode, which turns an
+/// opcode and two operands into either a FCmp instruction. isordered is passed
+/// in to determine which kind of predicate to use in the new fcmp instruction.
+static Value *getFCmpValue(bool isordered, unsigned code,
+ Value *LHS, Value *RHS,
+ InstCombiner::BuilderTy *Builder) {
+ CmpInst::Predicate Pred;
+ switch (code) {
+ default: assert(0 && "Illegal FCmp code!");
+ case 0: Pred = isordered ? FCmpInst::FCMP_ORD : FCmpInst::FCMP_UNO; break;
+ case 1: Pred = isordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; break;
+ case 2: Pred = isordered ? FCmpInst::FCMP_OEQ : FCmpInst::FCMP_UEQ; break;
+ case 3: Pred = isordered ? FCmpInst::FCMP_OGE : FCmpInst::FCMP_UGE; break;
+ case 4: Pred = isordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; break;
+ case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break;
+ case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break;
+ case 7: return ConstantInt::getTrue(LHS->getContext());
+ }
+ return Builder->CreateFCmp(Pred, LHS, RHS);
+}
+
+/// PredicatesFoldable - Return true if both predicates match sign or if at
+/// least one of them is an equality comparison (which is signless).
+static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
+ return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
+ (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
+ (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
+}
+
+// OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where
+// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is
+// guaranteed to be a binary operator.
+Instruction *InstCombiner::OptAndOp(Instruction *Op,
+ ConstantInt *OpRHS,
+ ConstantInt *AndRHS,
+ BinaryOperator &TheAnd) {
+ Value *X = Op->getOperand(0);
+ Constant *Together = 0;
+ if (!Op->isShift())
+ Together = ConstantExpr::getAnd(AndRHS, OpRHS);
+
+ switch (Op->getOpcode()) {
+ case Instruction::Xor:
+ if (Op->hasOneUse()) {
+ // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
+ Value *And = Builder->CreateAnd(X, AndRHS);
+ And->takeName(Op);
+ return BinaryOperator::CreateXor(And, Together);
+ }
+ break;
+ case Instruction::Or:
+ if (Together == AndRHS) // (X | C) & C --> C
+ return ReplaceInstUsesWith(TheAnd, AndRHS);
+
+ if (Op->hasOneUse() && Together != OpRHS) {
+ // (X | C1) & C2 --> (X | (C1&C2)) & C2
+ Value *Or = Builder->CreateOr(X, Together);
+ Or->takeName(Op);
+ return BinaryOperator::CreateAnd(Or, AndRHS);
+ }
+ break;
+ case Instruction::Add:
+ if (Op->hasOneUse()) {
+ // Adding a one to a single bit bit-field should be turned into an XOR
+ // of the bit. First thing to check is to see if this AND is with a
+ // single bit constant.
+ const APInt &AndRHSV = cast<ConstantInt>(AndRHS)->getValue();
+
+ // If there is only one bit set.
+ if (AndRHSV.isPowerOf2()) {
+ // Ok, at this point, we know that we are masking the result of the
+ // ADD down to exactly one bit. If the constant we are adding has
+ // no bits set below this bit, then we can eliminate the ADD.
+ const APInt& AddRHS = cast<ConstantInt>(OpRHS)->getValue();
+
+ // Check to see if any bits below the one bit set in AndRHSV are set.
+ if ((AddRHS & (AndRHSV-1)) == 0) {
+ // If not, the only thing that can effect the output of the AND is
+ // the bit specified by AndRHSV. If that bit is set, the effect of
+ // the XOR is to toggle the bit. If it is clear, then the ADD has
+ // no effect.
+ if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop
+ TheAnd.setOperand(0, X);
+ return &TheAnd;
+ } else {
+ // Pull the XOR out of the AND.
+ Value *NewAnd = Builder->CreateAnd(X, AndRHS);
+ NewAnd->takeName(Op);
+ return BinaryOperator::CreateXor(NewAnd, AndRHS);
+ }
+ }
+ }
+ }
+ break;
+
+ case Instruction::Shl: {
+ // We know that the AND will not produce any of the bits shifted in, so if
+ // the anded constant includes them, clear them now!
+ //
+ uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+ uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+ APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal));
+ ConstantInt *CI = ConstantInt::get(AndRHS->getContext(),
+ AndRHS->getValue() & ShlMask);
+
+ if (CI->getValue() == ShlMask) {
+ // Masking out bits that the shift already masks
+ return ReplaceInstUsesWith(TheAnd, Op); // No need for the and.
+ } else if (CI != AndRHS) { // Reducing bits set in and.
+ TheAnd.setOperand(1, CI);
+ return &TheAnd;
+ }
+ break;
+ }
+ case Instruction::LShr: {
+ // We know that the AND will not produce any of the bits shifted in, so if
+ // the anded constant includes them, clear them now! This only applies to
+ // unsigned shifts, because a signed shr may bring in set bits!
+ //
+ uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+ uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+ APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
+ ConstantInt *CI = ConstantInt::get(Op->getContext(),
+ AndRHS->getValue() & ShrMask);
+
+ if (CI->getValue() == ShrMask) {
+ // Masking out bits that the shift already masks.
+ return ReplaceInstUsesWith(TheAnd, Op);
+ } else if (CI != AndRHS) {
+ TheAnd.setOperand(1, CI); // Reduce bits set in and cst.
+ return &TheAnd;
+ }
+ break;
+ }
+ case Instruction::AShr:
+ // Signed shr.
+ // See if this is shifting in some sign extension, then masking it out
+ // with an and.
+ if (Op->hasOneUse()) {
+ uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+ uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+ APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
+ Constant *C = ConstantInt::get(Op->getContext(),
+ AndRHS->getValue() & ShrMask);
+ if (C == AndRHS) { // Masking out bits shifted in.
+ // (Val ashr C1) & C2 -> (Val lshr C1) & C2
+ // Make the argument unsigned.
+ Value *ShVal = Op->getOperand(0);
+ ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName());
+ return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName());
+ }
+ }
+ break;
+ }
+ return 0;
+}
+
+
+/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
+/// true, otherwise (V < Lo || V >= Hi). In pratice, we emit the more efficient
+/// (V-Lo) <u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates
+/// whether to treat the V, Lo and HI as signed or not. IB is the location to
+/// insert new instructions.
+Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
+ bool isSigned, bool Inside) {
+ assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ?
+ ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&
+ "Lo is not <= Hi in range emission code!");
+
+ if (Inside) {
+ if (Lo == Hi) // Trivially false.
+ return ConstantInt::getFalse(V->getContext());
+
+ // V >= Min && V < Hi --> V < Hi
+ if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
+ ICmpInst::Predicate pred = (isSigned ?
+ ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT);
+ return Builder->CreateICmp(pred, V, Hi);
+ }
+
+ // Emit V-Lo <u Hi-Lo
+ Constant *NegLo = ConstantExpr::getNeg(Lo);
+ Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
+ Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
+ return Builder->CreateICmpULT(Add, UpperBound);
+ }
+
+ if (Lo == Hi) // Trivially true.
+ return ConstantInt::getTrue(V->getContext());
+
+ // V < Min || V >= Hi -> V > Hi-1
+ Hi = SubOne(cast<ConstantInt>(Hi));
+ if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
+ ICmpInst::Predicate pred = (isSigned ?
+ ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
+ return Builder->CreateICmp(pred, V, Hi);
+ }
+
+ // Emit V-Lo >u Hi-1-Lo
+ // Note that Hi has already had one subtracted from it, above.
+ ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
+ Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
+ Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
+ return Builder->CreateICmpUGT(Add, LowerBound);
+}
+
+// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with
+// any number of 0s on either side. The 1s are allowed to wrap from LSB to
+// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is
+// not, since all 1s are not contiguous.
+static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
+ const APInt& V = Val->getValue();
+ uint32_t BitWidth = Val->getType()->getBitWidth();
+ if (!APIntOps::isShiftedMask(BitWidth, V)) return false;
+
+ // look for the first zero bit after the run of ones
+ MB = BitWidth - ((V - 1) ^ V).countLeadingZeros();
+ // look for the first non-zero bit
+ ME = V.getActiveBits();
+ return true;
+}
+
+/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask,
+/// where isSub determines whether the operator is a sub. If we can fold one of
+/// the following xforms:
+///
+/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask
+/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
+/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
+///
+/// return (A +/- B).
+///
+Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
+ ConstantInt *Mask, bool isSub,
+ Instruction &I) {
+ Instruction *LHSI = dyn_cast<Instruction>(LHS);
+ if (!LHSI || LHSI->getNumOperands() != 2 ||
+ !isa<ConstantInt>(LHSI->getOperand(1))) return 0;
+
+ ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1));
+
+ switch (LHSI->getOpcode()) {
+ default: return 0;
+ case Instruction::And:
+ if (ConstantExpr::getAnd(N, Mask) == Mask) {
+ // If the AndRHS is a power of two minus one (0+1+), this is simple.
+ if ((Mask->getValue().countLeadingZeros() +
+ Mask->getValue().countPopulation()) ==
+ Mask->getValue().getBitWidth())
+ break;
+
+ // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+
+ // part, we don't need any explicit masks to take them out of A. If that
+ // is all N is, ignore it.
+ uint32_t MB = 0, ME = 0;
+ if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive
+ uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth();
+ APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1));
+ if (MaskedValueIsZero(RHS, Mask))
+ break;
+ }
+ }
+ return 0;
+ case Instruction::Or:
+ case Instruction::Xor:
+ // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
+ if ((Mask->getValue().countLeadingZeros() +
+ Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
+ && ConstantExpr::getAnd(N, Mask)->isNullValue())
+ break;
+ return 0;
+ }
+
+ if (isSub)
+ return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold");
+ return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
+}
+
+/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
+Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
+ ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+
+ // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
+ if (PredicatesFoldable(LHSCC, RHSCC)) {
+ if (LHS->getOperand(0) == RHS->getOperand(1) &&
+ LHS->getOperand(1) == RHS->getOperand(0))
+ LHS->swapOperands();
+ if (LHS->getOperand(0) == RHS->getOperand(0) &&
+ LHS->getOperand(1) == RHS->getOperand(1)) {
+ Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+ unsigned Code = getICmpCode(LHS) & getICmpCode(RHS);
+ bool isSigned = LHS->isSigned() || RHS->isSigned();
+ return getICmpValue(isSigned, Code, Op0, Op1, Builder);
+ }
+ }
+
+ // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
+ Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
+ ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
+ ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
+ if (LHSCst == 0 || RHSCst == 0) return 0;
+
+ if (LHSCst == RHSCst && LHSCC == RHSCC) {
+ // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
+ // where C is a power of 2
+ if (LHSCC == ICmpInst::ICMP_ULT &&
+ LHSCst->getValue().isPowerOf2()) {
+ Value *NewOr = Builder->CreateOr(Val, Val2);
+ return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ }
+
+ // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
+ if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
+ Value *NewOr = Builder->CreateOr(Val, Val2);
+ return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ }
+
+ // (icmp ne (A & C1), 0) & (icmp ne (A & C2), 0) -->
+ // (icmp eq (A & (C1|C2)), (C1|C2)) where C1 and C2 are non-zero POT
+ if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
+ Value *Op1 = 0, *Op2 = 0;
+ ConstantInt *CI1 = 0, *CI2 = 0;
+ if (match(LHS->getOperand(0), m_And(m_Value(Op1), m_ConstantInt(CI1))) &&
+ match(RHS->getOperand(0), m_And(m_Value(Op2), m_ConstantInt(CI2)))) {
+ if (Op1 == Op2 && !CI1->isZero() && !CI2->isZero() &&
+ CI1->getValue().isPowerOf2() && CI2->getValue().isPowerOf2()) {
+ Constant *ConstOr = ConstantExpr::getOr(CI1, CI2);
+ Value *NewAnd = Builder->CreateAnd(Op1, ConstOr);
+ return Builder->CreateICmp(ICmpInst::ICMP_EQ, NewAnd, ConstOr);
+ }
+ }
+ }
+ }
+
+ // From here on, we only handle:
+ // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
+ if (Val != Val2) return 0;
+
+ // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
+ if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
+ RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
+ LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
+ RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
+ return 0;
+
+ // We can't fold (ugt x, C) & (sgt x, C2).
+ if (!PredicatesFoldable(LHSCC, RHSCC))
+ return 0;
+
+ // Ensure that the larger constant is on the RHS.
+ bool ShouldSwap;
+ if (CmpInst::isSigned(LHSCC) ||
+ (ICmpInst::isEquality(LHSCC) &&
+ CmpInst::isSigned(RHSCC)))
+ ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
+ else
+ ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
+
+ if (ShouldSwap) {
+ std::swap(LHS, RHS);
+ std::swap(LHSCst, RHSCst);
+ std::swap(LHSCC, RHSCC);
+ }
+
+ // At this point, we know we have two icmp instructions
+ // comparing a value against two constants and and'ing the result
+ // together. Because of the above check, we know that we only have
+ // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know
+ // (from the icmp folding check above), that the two constants
+ // are not equal and that the larger constant is on the RHS
+ assert(LHSCst != RHSCst && "Compares not folded above?");
+
+ switch (LHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false
+ case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false
+ case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+ case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13
+ case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13
+ case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13
+ return LHS;
+ }
+ case ICmpInst::ICMP_NE:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_ULT:
+ if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13
+ return Builder->CreateICmpULT(Val, LHSCst);
+ break; // (X != 13 & X u< 15) -> no change
+ case ICmpInst::ICMP_SLT:
+ if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13
+ return Builder->CreateICmpSLT(Val, LHSCst);
+ break; // (X != 13 & X s< 15) -> no change
+ case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15
+ case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15
+ case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15
+ return RHS;
+ case ICmpInst::ICMP_NE:
+ if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
+ Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+ Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+ return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1));
+ }
+ break; // (X != 13 & X != 15) -> no change
+ }
+ break;
+ case ICmpInst::ICMP_ULT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false
+ case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+ case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13
+ case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13
+ return LHS;
+ case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_SLT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false
+ case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+ case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13
+ case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13
+ return LHS;
+ case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_UGT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15
+ case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15
+ return RHS;
+ case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE:
+ if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14
+ return Builder->CreateICmp(LHSCC, Val, RHSCst);
+ break; // (X u> 13 & X != 15) -> no change
+ case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1
+ return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, false, true);
+ case ICmpInst::ICMP_SLT: // (X u> 13 & X s< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_SGT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15
+ case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15
+ return RHS;
+ case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE:
+ if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14
+ return Builder->CreateICmp(LHSCC, Val, RHSCst);
+ break; // (X s> 13 & X != 15) -> no change
+ case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1
+ return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, true, true);
+ case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change
+ break;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+/// FoldAndOfFCmps - Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of
+/// instcombine, this returns a Value which should already be inserted into the
+/// function.
+Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
+ if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
+ RHS->getPredicate() == FCmpInst::FCMP_ORD) {
+ // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y)
+ if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
+ if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
+ // If either of the constants are nans, then the whole thing returns
+ // false.
+ if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
+ return ConstantInt::getFalse(LHS->getContext());
+ return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
+ }
+
+ // Handle vector zeros. This occurs because the canonical form of
+ // "fcmp ord x,x" is "fcmp ord x, 0".
+ if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
+ isa<ConstantAggregateZero>(RHS->getOperand(1)))
+ return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
+ return 0;
+ }
+
+ Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
+ Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
+ FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
+
+
+ if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
+ // Swap RHS operands to match LHS.
+ Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
+ std::swap(Op1LHS, Op1RHS);
+ }
+
+ if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
+ // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
+ if (Op0CC == Op1CC)
+ return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
+ if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE)
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+ if (Op0CC == FCmpInst::FCMP_TRUE)
+ return RHS;
+ if (Op1CC == FCmpInst::FCMP_TRUE)
+ return LHS;
+
+ bool Op0Ordered;
+ bool Op1Ordered;
+ unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
+ unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
+ if (Op1Pred == 0) {
+ std::swap(LHS, RHS);
+ std::swap(Op0Pred, Op1Pred);
+ std::swap(Op0Ordered, Op1Ordered);
+ }
+ if (Op0Pred == 0) {
+ // uno && ueq -> uno && (uno || eq) -> ueq
+ // ord && olt -> ord && (ord && lt) -> olt
+ if (Op0Ordered == Op1Ordered)
+ return RHS;
+
+ // uno && oeq -> uno && (ord && eq) -> false
+ // uno && ord -> false
+ if (!Op0Ordered)
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+ // ord && ueq -> ord && (uno || eq) -> oeq
+ return getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS, Builder);
+ }
+ }
+
+ return 0;
+}
+
+
+Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
+ bool Changed = SimplifyCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyAndInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // See if we can simplify any instructions used by the instruction whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
+ const APInt &AndRHSMask = AndRHS->getValue();
+ APInt NotAndRHS(~AndRHSMask);
+
+ // Optimize a variety of ((val OP C1) & C2) combinations...
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+ Value *Op0LHS = Op0I->getOperand(0);
+ Value *Op0RHS = Op0I->getOperand(1);
+ switch (Op0I->getOpcode()) {
+ default: break;
+ case Instruction::Xor:
+ case Instruction::Or:
+ // If the mask is only needed on one incoming arm, push it up.
+ if (!Op0I->hasOneUse()) break;
+
+ if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
+ // Not masking anything out for the LHS, move to RHS.
+ Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
+ Op0RHS->getName()+".masked");
+ return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS);
+ }
+ if (!isa<Constant>(Op0RHS) &&
+ MaskedValueIsZero(Op0RHS, NotAndRHS)) {
+ // Not masking anything out for the RHS, move to LHS.
+ Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS,
+ Op0LHS->getName()+".masked");
+ return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS);
+ }
+
+ break;
+ case Instruction::Add:
+ // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS.
+ // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
+ // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
+ if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I))
+ return BinaryOperator::CreateAnd(V, AndRHS);
+ if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I))
+ return BinaryOperator::CreateAnd(V, AndRHS); // Add commutes
+ break;
+
+ case Instruction::Sub:
+ // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS.
+ // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
+ // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
+ if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I))
+ return BinaryOperator::CreateAnd(V, AndRHS);
+
+ // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS
+ // has 1's for all bits that the subtraction with A might affect.
+ if (Op0I->hasOneUse()) {
+ uint32_t BitWidth = AndRHSMask.getBitWidth();
+ uint32_t Zeros = AndRHSMask.countLeadingZeros();
+ APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros);
+
+ ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS);
+ if (!(A && A->isZero()) && // avoid infinite recursion.
+ MaskedValueIsZero(Op0LHS, Mask)) {
+ Value *NewNeg = Builder->CreateNeg(Op0RHS);
+ return BinaryOperator::CreateAnd(NewNeg, AndRHS);
+ }
+ }
+ break;
+
+ case Instruction::Shl:
+ case Instruction::LShr:
+ // (1 << x) & 1 --> zext(x == 0)
+ // (1 >> x) & 1 --> zext(x == 0)
+ if (AndRHSMask == 1 && Op0LHS == AndRHS) {
+ Value *NewICmp =
+ Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType()));
+ return new ZExtInst(NewICmp, I.getType());
+ }
+ break;
+ }
+
+ if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
+ if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I))
+ return Res;
+ } else if (CastInst *CI = dyn_cast<CastInst>(Op0)) {
+ // If this is an integer truncation or change from signed-to-unsigned, and
+ // if the source is an and/or with immediate, transform it. This
+ // frequently occurs for bitfield accesses.
+ if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) {
+ if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) &&
+ CastOp->getNumOperands() == 2)
+ if (ConstantInt *AndCI =dyn_cast<ConstantInt>(CastOp->getOperand(1))){
+ if (CastOp->getOpcode() == Instruction::And) {
+ // Change: and (cast (and X, C1) to T), C2
+ // into : and (cast X to T), trunc_or_bitcast(C1)&C2
+ // This will fold the two constants together, which may allow
+ // other simplifications.
+ Value *NewCast = Builder->CreateTruncOrBitCast(
+ CastOp->getOperand(0), I.getType(),
+ CastOp->getName()+".shrunk");
+ // trunc_or_bitcast(C1)&C2
+ Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
+ C3 = ConstantExpr::getAnd(C3, AndRHS);
+ return BinaryOperator::CreateAnd(NewCast, C3);
+ } else if (CastOp->getOpcode() == Instruction::Or) {
+ // Change: and (cast (or X, C1) to T), C2
+ // into : trunc(C1)&C2 iff trunc(C1)&C2 == C2
+ Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
+ if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS)
+ // trunc(C1)&C2
+ return ReplaceInstUsesWith(I, AndRHS);
+ }
+ }
+ }
+ }
+
+ // Try to fold constant and into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+
+ // (~A & ~B) == (~(A | B)) - De Morgan's Law
+ if (Value *Op0NotVal = dyn_castNotVal(Op0))
+ if (Value *Op1NotVal = dyn_castNotVal(Op1))
+ if (Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
+ I.getName()+".demorgan");
+ return BinaryOperator::CreateNot(Or);
+ }
+
+ {
+ Value *A = 0, *B = 0, *C = 0, *D = 0;
+ // (A|B) & ~(A&B) -> A^B
+ if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
+ ((A == C && B == D) || (A == D && B == C)))
+ return BinaryOperator::CreateXor(A, B);
+
+ // ~(A&B) & (A|B) -> A^B
+ if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) &&
+ ((A == C && B == D) || (A == D && B == C)))
+ return BinaryOperator::CreateXor(A, B);
+
+ if (Op0->hasOneUse() &&
+ match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
+ if (A == Op1) { // (A^B)&A -> A&(A^B)
+ I.swapOperands(); // Simplify below
+ std::swap(Op0, Op1);
+ } else if (B == Op1) { // (A^B)&B -> B&(B^A)
+ cast<BinaryOperator>(Op0)->swapOperands();
+ I.swapOperands(); // Simplify below
+ std::swap(Op0, Op1);
+ }
+ }
+
+ if (Op1->hasOneUse() &&
+ match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
+ if (B == Op0) { // B&(A^B) -> B&(B^A)
+ cast<BinaryOperator>(Op1)->swapOperands();
+ std::swap(A, B);
+ }
+ if (A == Op0) // A&(A^B) -> A & ~B
+ return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp"));
+ }
+
+ // (A&((~A)|B)) -> A&B
+ if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) ||
+ match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1)))))
+ return BinaryOperator::CreateAnd(A, Op1);
+ if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) ||
+ match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0)))))
+ return BinaryOperator::CreateAnd(A, Op0);
+ }
+
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1))
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
+ if (Value *Res = FoldAndOfICmps(LHS, RHS))
+ return ReplaceInstUsesWith(I, Res);
+
+ // If and'ing two fcmp, try combine them into one.
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+ if (Value *Res = FoldAndOfFCmps(LHS, RHS))
+ return ReplaceInstUsesWith(I, Res);
+
+
+ // fold (and (cast A), (cast B)) -> (cast (and A, B))
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
+ if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) {
+ const Type *SrcTy = Op0C->getOperand(0)->getType();
+ if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ?
+ SrcTy == Op1C->getOperand(0)->getType() &&
+ SrcTy->isIntOrIntVectorTy()) {
+ Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+
+ // Only do this if the casts both really cause code to be generated.
+ if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
+ ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
+ Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName());
+ return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+ }
+
+ // If this is and(cast(icmp), cast(icmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
+ if (Value *Res = FoldAndOfICmps(LHS, RHS))
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+
+ // If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
+ if (Value *Res = FoldAndOfFCmps(LHS, RHS))
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+ }
+ }
+
+ // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts.
+ if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
+ if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
+ if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&
+ SI0->getOperand(1) == SI1->getOperand(1) &&
+ (SI0->hasOneUse() || SI1->hasOneUse())) {
+ Value *NewOp =
+ Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0),
+ SI0->getName());
+ return BinaryOperator::Create(SI1->getOpcode(), NewOp,
+ SI1->getOperand(1));
+ }
+ }
+
+ return Changed ? &I : 0;
+}
+
+/// CollectBSwapParts - Analyze the specified subexpression and see if it is
+/// capable of providing pieces of a bswap. The subexpression provides pieces
+/// of a bswap if it is proven that each of the non-zero bytes in the output of
+/// the expression came from the corresponding "byte swapped" byte in some other
+/// value. For example, if the current subexpression is "(shl i32 %X, 24)" then
+/// we know that the expression deposits the low byte of %X into the high byte
+/// of the bswap result and that all other bytes are zero. This expression is
+/// accepted, the high byte of ByteValues is set to X to indicate a correct
+/// match.
+///
+/// This function returns true if the match was unsuccessful and false if so.
+/// On entry to the function the "OverallLeftShift" is a signed integer value
+/// indicating the number of bytes that the subexpression is later shifted. For
+/// example, if the expression is later right shifted by 16 bits, the
+/// OverallLeftShift value would be -2 on entry. This is used to specify which
+/// byte of ByteValues is actually being set.
+///
+/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding
+/// byte is masked to zero by a user. For example, in (X & 255), X will be
+/// processed with a bytemask of 1. Because bytemask is 32-bits, this limits
+/// this function to working on up to 32-byte (256 bit) values. ByteMask is
+/// always in the local (OverallLeftShift) coordinate space.
+///
+static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
+ SmallVector<Value*, 8> &ByteValues) {
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ // If this is an or instruction, it may be an inner node of the bswap.
+ if (I->getOpcode() == Instruction::Or) {
+ return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
+ ByteValues) ||
+ CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask,
+ ByteValues);
+ }
+
+ // If this is a logical shift by a constant multiple of 8, recurse with
+ // OverallLeftShift and ByteMask adjusted.
+ if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
+ unsigned ShAmt =
+ cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
+ // Ensure the shift amount is defined and of a byte value.
+ if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size()))
+ return true;
+
+ unsigned ByteShift = ShAmt >> 3;
+ if (I->getOpcode() == Instruction::Shl) {
+ // X << 2 -> collect(X, +2)
+ OverallLeftShift += ByteShift;
+ ByteMask >>= ByteShift;
+ } else {
+ // X >>u 2 -> collect(X, -2)
+ OverallLeftShift -= ByteShift;
+ ByteMask <<= ByteShift;
+ ByteMask &= (~0U >> (32-ByteValues.size()));
+ }
+
+ if (OverallLeftShift >= (int)ByteValues.size()) return true;
+ if (OverallLeftShift <= -(int)ByteValues.size()) return true;
+
+ return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
+ ByteValues);
+ }
+
+ // If this is a logical 'and' with a mask that clears bytes, clear the
+ // corresponding bytes in ByteMask.
+ if (I->getOpcode() == Instruction::And &&
+ isa<ConstantInt>(I->getOperand(1))) {
+ // Scan every byte of the and mask, seeing if the byte is either 0 or 255.
+ unsigned NumBytes = ByteValues.size();
+ APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255);
+ const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
+
+ for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) {
+ // If this byte is masked out by a later operation, we don't care what
+ // the and mask is.
+ if ((ByteMask & (1 << i)) == 0)
+ continue;
+
+ // If the AndMask is all zeros for this byte, clear the bit.
+ APInt MaskB = AndMask & Byte;
+ if (MaskB == 0) {
+ ByteMask &= ~(1U << i);
+ continue;
+ }
+
+ // If the AndMask is not all ones for this byte, it's not a bytezap.
+ if (MaskB != Byte)
+ return true;
+
+ // Otherwise, this byte is kept.
+ }
+
+ return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
+ ByteValues);
+ }
+ }
+
+ // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be
+ // the input value to the bswap. Some observations: 1) if more than one byte
+ // is demanded from this input, then it could not be successfully assembled
+ // into a byteswap. At least one of the two bytes would not be aligned with
+ // their ultimate destination.
+ if (!isPowerOf2_32(ByteMask)) return true;
+ unsigned InputByteNo = CountTrailingZeros_32(ByteMask);
+
+ // 2) The input and ultimate destinations must line up: if byte 3 of an i32
+ // is demanded, it needs to go into byte 0 of the result. This means that the
+ // byte needs to be shifted until it lands in the right byte bucket. The
+ // shift amount depends on the position: if the byte is coming from the high
+ // part of the value (e.g. byte 3) then it must be shifted right. If from the
+ // low part, it must be shifted left.
+ unsigned DestByteNo = InputByteNo + OverallLeftShift;
+ if (InputByteNo < ByteValues.size()/2) {
+ if (ByteValues.size()-1-DestByteNo != InputByteNo)
+ return true;
+ } else {
+ if (ByteValues.size()-1-DestByteNo != InputByteNo)
+ return true;
+ }
+
+ // If the destination byte value is already defined, the values are or'd
+ // together, which isn't a bswap (unless it's an or of the same bits).
+ if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V)
+ return true;
+ ByteValues[DestByteNo] = V;
+ return false;
+}
+
+/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom.
+/// If so, insert the new bswap intrinsic and return it.
+Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
+ const IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
+ if (!ITy || ITy->getBitWidth() % 16 ||
+ // ByteMask only allows up to 32-byte values.
+ ITy->getBitWidth() > 32*8)
+ return 0; // Can only bswap pairs of bytes. Can't do vectors.
+
+ /// ByteValues - For each byte of the result, we keep track of which value
+ /// defines each byte.
+ SmallVector<Value*, 8> ByteValues;
+ ByteValues.resize(ITy->getBitWidth()/8);
+
+ // Try to find all the pieces corresponding to the bswap.
+ uint32_t ByteMask = ~0U >> (32-ByteValues.size());
+ if (CollectBSwapParts(&I, 0, ByteMask, ByteValues))
+ return 0;
+
+ // Check to see if all of the bytes come from the same value.
+ Value *V = ByteValues[0];
+ if (V == 0) return 0; // Didn't find a byte? Must be zero.
+
+ // Check to make sure that all of the bytes come from the same value.
+ for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
+ if (ByteValues[i] != V)
+ return 0;
+ const Type *Tys[] = { ITy };
+ Module *M = I.getParent()->getParent()->getParent();
+ Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
+ return CallInst::Create(F, V);
+}
+
+/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D). Check
+/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then
+/// we can simplify this expression to "cond ? C : D or B".
+static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
+ Value *C, Value *D) {
+ // If A is not a select of -1/0, this cannot match.
+ Value *Cond = 0;
+ if (!match(A, m_SExt(m_Value(Cond))) ||
+ !Cond->getType()->isIntegerTy(1))
+ return 0;
+
+ // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B.
+ if (match(D, m_Not(m_SExt(m_Specific(Cond)))))
+ return SelectInst::Create(Cond, C, B);
+ if (match(D, m_SExt(m_Not(m_Specific(Cond)))))
+ return SelectInst::Create(Cond, C, B);
+
+ // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D.
+ if (match(B, m_Not(m_SExt(m_Specific(Cond)))))
+ return SelectInst::Create(Cond, C, D);
+ if (match(B, m_SExt(m_Not(m_Specific(Cond)))))
+ return SelectInst::Create(Cond, C, D);
+ return 0;
+}
+
+/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
+Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
+ ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+
+ // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
+ if (PredicatesFoldable(LHSCC, RHSCC)) {
+ if (LHS->getOperand(0) == RHS->getOperand(1) &&
+ LHS->getOperand(1) == RHS->getOperand(0))
+ LHS->swapOperands();
+ if (LHS->getOperand(0) == RHS->getOperand(0) &&
+ LHS->getOperand(1) == RHS->getOperand(1)) {
+ Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+ unsigned Code = getICmpCode(LHS) | getICmpCode(RHS);
+ bool isSigned = LHS->isSigned() || RHS->isSigned();
+ return getICmpValue(isSigned, Code, Op0, Op1, Builder);
+ }
+ }
+
+ // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
+ Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
+ ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
+ ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
+ if (LHSCst == 0 || RHSCst == 0) return 0;
+
+ if (LHSCst == RHSCst && LHSCC == RHSCC) {
+ // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
+ if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
+ Value *NewOr = Builder->CreateOr(Val, Val2);
+ return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ }
+
+ // (icmp eq (A & C1), 0) | (icmp eq (A & C2), 0) -->
+ // (icmp ne (A & (C1|C2)), (C1|C2)) where C1 and C2 are non-zero POT
+ if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
+ Value *Op1 = 0, *Op2 = 0;
+ ConstantInt *CI1 = 0, *CI2 = 0;
+ if (match(LHS->getOperand(0), m_And(m_Value(Op1), m_ConstantInt(CI1))) &&
+ match(RHS->getOperand(0), m_And(m_Value(Op2), m_ConstantInt(CI2)))) {
+ if (Op1 == Op2 && !CI1->isZero() && !CI2->isZero() &&
+ CI1->getValue().isPowerOf2() && CI2->getValue().isPowerOf2()) {
+ Constant *ConstOr = ConstantExpr::getOr(CI1, CI2);
+ Value *NewAnd = Builder->CreateAnd(Op1, ConstOr);
+ return Builder->CreateICmp(ICmpInst::ICMP_NE, NewAnd, ConstOr);
+ }
+ }
+ }
+ }
+
+ // From here on, we only handle:
+ // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
+ if (Val != Val2) return 0;
+
+ // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
+ if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
+ RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
+ LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
+ RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
+ return 0;
+
+ // We can't fold (ugt x, C) | (sgt x, C2).
+ if (!PredicatesFoldable(LHSCC, RHSCC))
+ return 0;
+
+ // Ensure that the larger constant is on the RHS.
+ bool ShouldSwap;
+ if (CmpInst::isSigned(LHSCC) ||
+ (ICmpInst::isEquality(LHSCC) &&
+ CmpInst::isSigned(RHSCC)))
+ ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
+ else
+ ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
+
+ if (ShouldSwap) {
+ std::swap(LHS, RHS);
+ std::swap(LHSCst, RHSCst);
+ std::swap(LHSCC, RHSCC);
+ }
+
+ // At this point, we know we have two icmp instructions
+ // comparing a value against two constants and or'ing the result
+ // together. Because of the above check, we know that we only have
+ // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the
+ // icmp folding check above), that the two constants are not
+ // equal.
+ assert(LHSCst != RHSCst && "Compares not folded above?");
+
+ switch (LHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ:
+ if (LHSCst == SubOne(RHSCst)) {
+ // (X == 13 | X == 14) -> X-13 <u 2
+ Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+ Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+ AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
+ return Builder->CreateICmpULT(Add, AddCST);
+ }
+ break; // (X == 13 | X == 15) -> no change
+ case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change
+ case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15
+ case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15
+ case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15
+ return RHS;
+ }
+ break;
+ case ICmpInst::ICMP_NE:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13
+ case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13
+ case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13
+ return LHS;
+ case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true
+ case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true
+ case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true
+ return ConstantInt::getTrue(LHS->getContext());
+ }
+ break;
+ case ICmpInst::ICMP_ULT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change
+ break;
+ case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2
+ // If RHSCst is [us]MAXINT, it is always false. Not handling
+ // this can cause overflow.
+ if (RHSCst->isMaxValue(false))
+ return LHS;
+ return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), false, false);
+ case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15
+ case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15
+ return RHS;
+ case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_SLT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change
+ break;
+ case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2
+ // If RHSCst is [us]MAXINT, it is always false. Not handling
+ // this can cause overflow.
+ if (RHSCst->isMaxValue(true))
+ return LHS;
+ return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), true, false);
+ case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15
+ case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15
+ return RHS;
+ case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_UGT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13
+ case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13
+ return LHS;
+ case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true
+ case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true
+ return ConstantInt::getTrue(LHS->getContext());
+ case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change
+ break;
+ }
+ break;
+ case ICmpInst::ICMP_SGT:
+ switch (RHSCC) {
+ default: llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13
+ case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13
+ return LHS;
+ case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change
+ break;
+ case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true
+ case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true
+ return ConstantInt::getTrue(LHS->getContext());
+ case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change
+ break;
+ }
+ break;
+ }
+ return 0;
+}
+
+/// FoldOrOfFCmps - Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of
+/// instcombine, this returns a Value which should already be inserted into the
+/// function.
+Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
+ if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
+ RHS->getPredicate() == FCmpInst::FCMP_UNO &&
+ LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
+ if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
+ if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
+ // If either of the constants are nans, then the whole thing returns
+ // true.
+ if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
+ return ConstantInt::getTrue(LHS->getContext());
+
+ // Otherwise, no need to compare the two constants, compare the
+ // rest.
+ return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
+ }
+
+ // Handle vector zeros. This occurs because the canonical form of
+ // "fcmp uno x,x" is "fcmp uno x, 0".
+ if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
+ isa<ConstantAggregateZero>(RHS->getOperand(1)))
+ return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
+
+ return 0;
+ }
+
+ Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
+ Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
+ FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
+
+ if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
+ // Swap RHS operands to match LHS.
+ Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
+ std::swap(Op1LHS, Op1RHS);
+ }
+ if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
+ // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y).
+ if (Op0CC == Op1CC)
+ return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
+ if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE)
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
+ if (Op0CC == FCmpInst::FCMP_FALSE)
+ return RHS;
+ if (Op1CC == FCmpInst::FCMP_FALSE)
+ return LHS;
+ bool Op0Ordered;
+ bool Op1Ordered;
+ unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
+ unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
+ if (Op0Ordered == Op1Ordered) {
+ // If both are ordered or unordered, return a new fcmp with
+ // or'ed predicates.
+ return getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS, Builder);
+ }
+ }
+ return 0;
+}
+
+/// FoldOrWithConstants - This helper function folds:
+///
+/// ((A | B) & C1) | (B & C2)
+///
+/// into:
+///
+/// (A & C1) | B
+///
+/// when the XOR of the two constants is "all ones" (-1).
+Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
+ Value *A, Value *B, Value *C) {
+ ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
+ if (!CI1) return 0;
+
+ Value *V1 = 0;
+ ConstantInt *CI2 = 0;
+ if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0;
+
+ APInt Xor = CI1->getValue() ^ CI2->getValue();
+ if (!Xor.isAllOnesValue()) return 0;
+
+ if (V1 == A || V1 == B) {
+ Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1);
+ return BinaryOperator::CreateOr(NewOp, V1);
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitOr(BinaryOperator &I) {
+ bool Changed = SimplifyCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyOrInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // See if we can simplify any instructions used by the instruction whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ ConstantInt *C1 = 0; Value *X = 0;
+ // (X & C1) | C2 --> (X | C2) & (C1|C2)
+ // iff (C1 & C2) == 0.
+ if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) &&
+ (RHS->getValue() & C1->getValue()) != 0 &&
+ Op0->hasOneUse()) {
+ Value *Or = Builder->CreateOr(X, RHS);
+ Or->takeName(Op0);
+ return BinaryOperator::CreateAnd(Or,
+ ConstantInt::get(I.getContext(),
+ RHS->getValue() | C1->getValue()));
+ }
+
+ // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
+ if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) &&
+ Op0->hasOneUse()) {
+ Value *Or = Builder->CreateOr(X, RHS);
+ Or->takeName(Op0);
+ return BinaryOperator::CreateXor(Or,
+ ConstantInt::get(I.getContext(),
+ C1->getValue() & ~RHS->getValue()));
+ }
+
+ // Try to fold constant and into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ Value *A = 0, *B = 0;
+ ConstantInt *C1 = 0, *C2 = 0;
+
+ // (A | B) | C and A | (B | C) -> bswap if possible.
+ // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible.
+ if (match(Op0, m_Or(m_Value(), m_Value())) ||
+ match(Op1, m_Or(m_Value(), m_Value())) ||
+ (match(Op0, m_Shift(m_Value(), m_Value())) &&
+ match(Op1, m_Shift(m_Value(), m_Value())))) {
+ if (Instruction *BSwap = MatchBSwap(I))
+ return BSwap;
+ }
+
+ // (X^C)|Y -> (X|Y)^C iff Y&C == 0
+ if (Op0->hasOneUse() &&
+ match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
+ MaskedValueIsZero(Op1, C1->getValue())) {
+ Value *NOr = Builder->CreateOr(A, Op1);
+ NOr->takeName(Op0);
+ return BinaryOperator::CreateXor(NOr, C1);
+ }
+
+ // Y|(X^C) -> (X|Y)^C iff Y&C == 0
+ if (Op1->hasOneUse() &&
+ match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
+ MaskedValueIsZero(Op0, C1->getValue())) {
+ Value *NOr = Builder->CreateOr(A, Op0);
+ NOr->takeName(Op0);
+ return BinaryOperator::CreateXor(NOr, C1);
+ }
+
+ // (A & C)|(B & D)
+ Value *C = 0, *D = 0;
+ if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
+ match(Op1, m_And(m_Value(B), m_Value(D)))) {
+ Value *V1 = 0, *V2 = 0, *V3 = 0;
+ C1 = dyn_cast<ConstantInt>(C);
+ C2 = dyn_cast<ConstantInt>(D);
+ if (C1 && C2) { // (A & C1)|(B & C2)
+ // If we have: ((V + N) & C1) | (V & C2)
+ // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+ // replace with V+N.
+ if (C1->getValue() == ~C2->getValue()) {
+ if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+
+ match(A, m_Add(m_Value(V1), m_Value(V2)))) {
+ // Add commutes, try both ways.
+ if (V1 == B && MaskedValueIsZero(V2, C2->getValue()))
+ return ReplaceInstUsesWith(I, A);
+ if (V2 == B && MaskedValueIsZero(V1, C2->getValue()))
+ return ReplaceInstUsesWith(I, A);
+ }
+ // Or commutes, try both ways.
+ if ((C1->getValue() & (C1->getValue()+1)) == 0 &&
+ match(B, m_Add(m_Value(V1), m_Value(V2)))) {
+ // Add commutes, try both ways.
+ if (V1 == A && MaskedValueIsZero(V2, C1->getValue()))
+ return ReplaceInstUsesWith(I, B);
+ if (V2 == A && MaskedValueIsZero(V1, C1->getValue()))
+ return ReplaceInstUsesWith(I, B);
+ }
+ }
+
+ if ((C1->getValue() & C2->getValue()) == 0) {
+ // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
+ // iff (C1&C2) == 0 and (N&~C1) == 0
+ if (match(A, m_Or(m_Value(V1), m_Value(V2))) &&
+ ((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) || // (V|N)
+ (V2 == B && MaskedValueIsZero(V1, ~C1->getValue())))) // (N|V)
+ return BinaryOperator::CreateAnd(A,
+ ConstantInt::get(A->getContext(),
+ C1->getValue()|C2->getValue()));
+ // Or commutes, try both ways.
+ if (match(B, m_Or(m_Value(V1), m_Value(V2))) &&
+ ((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) || // (V|N)
+ (V2 == A && MaskedValueIsZero(V1, ~C2->getValue())))) // (N|V)
+ return BinaryOperator::CreateAnd(B,
+ ConstantInt::get(B->getContext(),
+ C1->getValue()|C2->getValue()));
+
+ // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2)
+ // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0.
+ ConstantInt *C3 = 0, *C4 = 0;
+ if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) &&
+ (C3->getValue() & ~C1->getValue()) == 0 &&
+ match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) &&
+ (C4->getValue() & ~C2->getValue()) == 0) {
+ V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield");
+ return BinaryOperator::CreateAnd(V2,
+ ConstantInt::get(B->getContext(),
+ C1->getValue()|C2->getValue()));
+ }
+ }
+ }
+
+ // Check to see if we have any common things being and'ed. If so, find the
+ // terms for V1 & (V2|V3).
+ if (Op0->hasOneUse() || Op1->hasOneUse()) {
+ V1 = 0;
+ if (A == B) // (A & C)|(A & D) == A & (C|D)
+ V1 = A, V2 = C, V3 = D;
+ else if (A == D) // (A & C)|(B & A) == A & (B|C)
+ V1 = A, V2 = B, V3 = C;
+ else if (C == B) // (A & C)|(C & D) == C & (A|D)
+ V1 = C, V2 = A, V3 = D;
+ else if (C == D) // (A & C)|(B & C) == C & (A|B)
+ V1 = C, V2 = A, V3 = B;
+
+ if (V1) {
+ Value *Or = Builder->CreateOr(V2, V3, "tmp");
+ return BinaryOperator::CreateAnd(V1, Or);
+ }
+ }
+
+ // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants.
+ // Don't do this for vector select idioms, the code generator doesn't handle
+ // them well yet.
+ if (!I.getType()->isVectorTy()) {
+ if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D))
+ return Match;
+ if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C))
+ return Match;
+ if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D))
+ return Match;
+ if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C))
+ return Match;
+ }
+
+ // ((A&~B)|(~A&B)) -> A^B
+ if ((match(C, m_Not(m_Specific(D))) &&
+ match(B, m_Not(m_Specific(A)))))
+ return BinaryOperator::CreateXor(A, D);
+ // ((~B&A)|(~A&B)) -> A^B
+ if ((match(A, m_Not(m_Specific(D))) &&
+ match(B, m_Not(m_Specific(C)))))
+ return BinaryOperator::CreateXor(C, D);
+ // ((A&~B)|(B&~A)) -> A^B
+ if ((match(C, m_Not(m_Specific(B))) &&
+ match(D, m_Not(m_Specific(A)))))
+ return BinaryOperator::CreateXor(A, B);
+ // ((~B&A)|(B&~A)) -> A^B
+ if ((match(A, m_Not(m_Specific(B))) &&
+ match(D, m_Not(m_Specific(C)))))
+ return BinaryOperator::CreateXor(C, B);
+
+ // ((A|B)&1)|(B&-2) -> (A&1) | B
+ if (match(A, m_Or(m_Value(V1), m_Specific(B))) ||
+ match(A, m_Or(m_Specific(B), m_Value(V1)))) {
+ Instruction *Ret = FoldOrWithConstants(I, Op1, V1, B, C);
+ if (Ret) return Ret;
+ }
+ // (B&-2)|((A|B)&1) -> (A&1) | B
+ if (match(B, m_Or(m_Specific(A), m_Value(V1))) ||
+ match(B, m_Or(m_Value(V1), m_Specific(A)))) {
+ Instruction *Ret = FoldOrWithConstants(I, Op0, A, V1, D);
+ if (Ret) return Ret;
+ }
+ }
+
+ // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts.
+ if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
+ if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
+ if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&
+ SI0->getOperand(1) == SI1->getOperand(1) &&
+ (SI0->hasOneUse() || SI1->hasOneUse())) {
+ Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0),
+ SI0->getName());
+ return BinaryOperator::Create(SI1->getOpcode(), NewOp,
+ SI1->getOperand(1));
+ }
+ }
+
+ // (~A | ~B) == (~(A & B)) - De Morgan's Law
+ if (Value *Op0NotVal = dyn_castNotVal(Op0))
+ if (Value *Op1NotVal = dyn_castNotVal(Op1))
+ if (Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal,
+ I.getName()+".demorgan");
+ return BinaryOperator::CreateNot(And);
+ }
+
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
+ if (Value *Res = FoldOrOfICmps(LHS, RHS))
+ return ReplaceInstUsesWith(I, Res);
+
+ // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y)
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+ if (Value *Res = FoldOrOfFCmps(LHS, RHS))
+ return ReplaceInstUsesWith(I, Res);
+
+ // fold (or (cast A), (cast B)) -> (cast (or A, B))
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
+ if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
+ const Type *SrcTy = Op0C->getOperand(0)->getType();
+ if (SrcTy == Op1C->getOperand(0)->getType() &&
+ SrcTy->isIntOrIntVectorTy()) {
+ Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+
+ if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) &&
+ // Only do this if the casts both really cause code to be
+ // generated.
+ ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
+ ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
+ Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName());
+ return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+ }
+
+ // If this is or(cast(icmp), cast(icmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
+ if (Value *Res = FoldOrOfICmps(LHS, RHS))
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+
+ // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
+ if (Value *Res = FoldOrOfFCmps(LHS, RHS))
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+ }
+ }
+ }
+
+ return Changed ? &I : 0;
+}
+
+Instruction *InstCombiner::visitXor(BinaryOperator &I) {
+ bool Changed = SimplifyCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (isa<UndefValue>(Op1)) {
+ if (isa<UndefValue>(Op0))
+ // Handle undef ^ undef -> 0 special case. This is a common
+ // idiom (misuse).
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ return ReplaceInstUsesWith(I, Op1); // X ^ undef -> undef
+ }
+
+ // xor X, X = 0
+ if (Op0 == Op1)
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+ // See if we can simplify any instructions used by the instruction whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+ if (I.getType()->isVectorTy())
+ if (isa<ConstantAggregateZero>(Op1))
+ return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X
+
+ // Is this a ~ operation?
+ if (Value *NotOp = dyn_castNotVal(&I)) {
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
+ if (Op0I->getOpcode() == Instruction::And ||
+ Op0I->getOpcode() == Instruction::Or) {
+ // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
+ // ~(~X | Y) === (X & ~Y) - De Morgan's Law
+ if (dyn_castNotVal(Op0I->getOperand(1)))
+ Op0I->swapOperands();
+ if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
+ Value *NotY =
+ Builder->CreateNot(Op0I->getOperand(1),
+ Op0I->getOperand(1)->getName()+".not");
+ if (Op0I->getOpcode() == Instruction::And)
+ return BinaryOperator::CreateOr(Op0NotVal, NotY);
+ return BinaryOperator::CreateAnd(Op0NotVal, NotY);
+ }
+
+ // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
+ // ~(X | Y) === (~X & ~Y) - De Morgan's Law
+ if (isFreeToInvert(Op0I->getOperand(0)) &&
+ isFreeToInvert(Op0I->getOperand(1))) {
+ Value *NotX =
+ Builder->CreateNot(Op0I->getOperand(0), "notlhs");
+ Value *NotY =
+ Builder->CreateNot(Op0I->getOperand(1), "notrhs");
+ if (Op0I->getOpcode() == Instruction::And)
+ return BinaryOperator::CreateOr(NotX, NotY);
+ return BinaryOperator::CreateAnd(NotX, NotY);
+ }
+
+ } else if (Op0I->getOpcode() == Instruction::AShr) {
+ // ~(~X >>s Y) --> (X >>s Y)
+ if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0)))
+ return BinaryOperator::CreateAShr(Op0NotVal, Op0I->getOperand(1));
+ }
+ }
+ }
+
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ if (RHS->isOne() && Op0->hasOneUse())
+ // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
+ if (CmpInst *CI = dyn_cast<CmpInst>(Op0))
+ return CmpInst::Create(CI->getOpcode(),
+ CI->getInversePredicate(),
+ CI->getOperand(0), CI->getOperand(1));
+
+ // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
+ if (CI->hasOneUse() && Op0C->hasOneUse()) {
+ Instruction::CastOps Opcode = Op0C->getOpcode();
+ if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
+ (RHS == ConstantExpr::getCast(Opcode,
+ ConstantInt::getTrue(I.getContext()),
+ Op0C->getDestTy()))) {
+ CI->setPredicate(CI->getInversePredicate());
+ return CastInst::Create(Opcode, CI, Op0C->getType());
+ }
+ }
+ }
+ }
+
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+ // ~(c-X) == X-c-1 == X+(-c-1)
+ if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
+ if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) {
+ Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
+ Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C,
+ ConstantInt::get(I.getType(), 1));
+ return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);
+ }
+
+ if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
+ if (Op0I->getOpcode() == Instruction::Add) {
+ // ~(X-c) --> (-c-1)-X
+ if (RHS->isAllOnesValue()) {
+ Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
+ return BinaryOperator::CreateSub(
+ ConstantExpr::getSub(NegOp0CI,
+ ConstantInt::get(I.getType(), 1)),
+ Op0I->getOperand(0));
+ } else if (RHS->getValue().isSignBit()) {
+ // (X + C) ^ signbit -> (X + C + signbit)
+ Constant *C = ConstantInt::get(I.getContext(),
+ RHS->getValue() + Op0CI->getValue());
+ return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
+
+ }
+ } else if (Op0I->getOpcode() == Instruction::Or) {
+ // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0
+ if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) {
+ Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
+ // Anything in both C1 and C2 is known to be zero, remove it from
+ // NewRHS.
+ Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
+ NewRHS = ConstantExpr::getAnd(NewRHS,
+ ConstantExpr::getNot(CommonBits));
+ Worklist.Add(Op0I);
+ I.setOperand(0, Op0I->getOperand(0));
+ I.setOperand(1, NewRHS);
+ return &I;
+ }
+ }
+ }
+ }
+
+ // Try to fold constant and into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1
+ if (X == Op1)
+ return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
+
+ if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1
+ if (X == Op0)
+ return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
+
+
+ BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1);
+ if (Op1I) {
+ Value *A, *B;
+ if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) {
+ if (A == Op0) { // B^(B|A) == (A|B)^B
+ Op1I->swapOperands();
+ I.swapOperands();
+ std::swap(Op0, Op1);
+ } else if (B == Op0) { // B^(A|B) == (A|B)^B
+ I.swapOperands(); // Simplified below.
+ std::swap(Op0, Op1);
+ }
+ } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) {
+ return ReplaceInstUsesWith(I, B); // A^(A^B) == B
+ } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) {
+ return ReplaceInstUsesWith(I, A); // A^(B^A) == B
+ } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) &&
+ Op1I->hasOneUse()){
+ if (A == Op0) { // A^(A&B) -> A^(B&A)
+ Op1I->swapOperands();
+ std::swap(A, B);
+ }
+ if (B == Op0) { // A^(B&A) -> (B&A)^A
+ I.swapOperands(); // Simplified below.
+ std::swap(Op0, Op1);
+ }
+ }
+ }
+
+ BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0);
+ if (Op0I) {
+ Value *A, *B;
+ if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
+ Op0I->hasOneUse()) {
+ if (A == Op1) // (B|A)^B == (A|B)^B
+ std::swap(A, B);
+ if (B == Op1) // (A|B)^B == A & ~B
+ return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp"));
+ } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) {
+ return ReplaceInstUsesWith(I, B); // (A^B)^A == B
+ } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) {
+ return ReplaceInstUsesWith(I, A); // (B^A)^A == B
+ } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
+ Op0I->hasOneUse()){
+ if (A == Op1) // (A&B)^A -> (B&A)^A
+ std::swap(A, B);
+ if (B == Op1 && // (B&A)^A == ~B & A
+ !isa<ConstantInt>(Op1)) { // Canonical form is (B&C)^C
+ return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1);
+ }
+ }
+ }
+
+ // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts.
+ if (Op0I && Op1I && Op0I->isShift() &&
+ Op0I->getOpcode() == Op1I->getOpcode() &&
+ Op0I->getOperand(1) == Op1I->getOperand(1) &&
+ (Op1I->hasOneUse() || Op1I->hasOneUse())) {
+ Value *NewOp =
+ Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0),
+ Op0I->getName());
+ return BinaryOperator::Create(Op1I->getOpcode(), NewOp,
+ Op1I->getOperand(1));
+ }
+
+ if (Op0I && Op1I) {
+ Value *A, *B, *C, *D;
+ // (A & B)^(A | B) -> A ^ B
+ if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1I, m_Or(m_Value(C), m_Value(D)))) {
+ if ((A == C && B == D) || (A == D && B == C))
+ return BinaryOperator::CreateXor(A, B);
+ }
+ // (A | B)^(A & B) -> A ^ B
+ if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1I, m_And(m_Value(C), m_Value(D)))) {
+ if ((A == C && B == D) || (A == D && B == C))
+ return BinaryOperator::CreateXor(A, B);
+ }
+
+ // (A & B)^(C & D)
+ if ((Op0I->hasOneUse() || Op1I->hasOneUse()) &&
+ match(Op0I, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1I, m_And(m_Value(C), m_Value(D)))) {
+ // (X & Y)^(X & Y) -> (Y^Z) & X
+ Value *X = 0, *Y = 0, *Z = 0;
+ if (A == C)
+ X = A, Y = B, Z = D;
+ else if (A == D)
+ X = A, Y = B, Z = C;
+ else if (B == C)
+ X = B, Y = A, Z = D;
+ else if (B == D)
+ X = B, Y = A, Z = C;
+
+ if (X) {
+ Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName());
+ return BinaryOperator::CreateAnd(NewOp, X);
+ }
+ }
+ }
+
+ // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
+ if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) {
+ if (LHS->getOperand(0) == RHS->getOperand(1) &&
+ LHS->getOperand(1) == RHS->getOperand(0))
+ LHS->swapOperands();
+ if (LHS->getOperand(0) == RHS->getOperand(0) &&
+ LHS->getOperand(1) == RHS->getOperand(1)) {
+ Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+ unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
+ bool isSigned = LHS->isSigned() || RHS->isSigned();
+ return ReplaceInstUsesWith(I,
+ getICmpValue(isSigned, Code, Op0, Op1, Builder));
+ }
+ }
+
+ // fold (xor (cast A), (cast B)) -> (cast (xor A, B))
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
+ if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind?
+ const Type *SrcTy = Op0C->getOperand(0)->getType();
+ if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntegerTy() &&
+ // Only do this if the casts both really cause code to be generated.
+ ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0),
+ I.getType()) &&
+ ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0),
+ I.getType())) {
+ Value *NewOp = Builder->CreateXor(Op0C->getOperand(0),
+ Op1C->getOperand(0), I.getName());
+ return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+ }
+ }
+ }
+
+ return Changed ? &I : 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
new file mode 100644
index 0000000..0ebe3b4
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -0,0 +1,1295 @@
+//===- InstCombineCalls.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitCall and visitInvoke functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+using namespace llvm;
+
+/// getPromotedType - Return the specified type promoted as it would be to pass
+/// though a va_arg area.
+static const Type *getPromotedType(const Type *Ty) {
+ if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
+ if (ITy->getBitWidth() < 32)
+ return Type::getInt32Ty(Ty->getContext());
+ }
+ return Ty;
+}
+
+/// EnforceKnownAlignment - If the specified pointer points to an object that
+/// we control, modify the object's alignment to PrefAlign. This isn't
+/// often possible though. If alignment is important, a more reliable approach
+/// is to simply align all global variables and allocation instructions to
+/// their preferred alignment from the beginning.
+///
+static unsigned EnforceKnownAlignment(Value *V,
+ unsigned Align, unsigned PrefAlign) {
+
+ User *U = dyn_cast<User>(V);
+ if (!U) return Align;
+
+ switch (Operator::getOpcode(U)) {
+ default: break;
+ case Instruction::BitCast:
+ return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
+ case Instruction::GetElementPtr: {
+ // If all indexes are zero, it is just the alignment of the base pointer.
+ bool AllZeroOperands = true;
+ for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
+ if (!isa<Constant>(*i) ||
+ !cast<Constant>(*i)->isNullValue()) {
+ AllZeroOperands = false;
+ break;
+ }
+
+ if (AllZeroOperands) {
+ // Treat this like a bitcast.
+ return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
+ }
+ return Align;
+ }
+ case Instruction::Alloca: {
+ AllocaInst *AI = cast<AllocaInst>(V);
+ // If there is a requested alignment and if this is an alloca, round up.
+ if (AI->getAlignment() >= PrefAlign)
+ return AI->getAlignment();
+ AI->setAlignment(PrefAlign);
+ return PrefAlign;
+ }
+ }
+
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ // If there is a large requested alignment and we can, bump up the alignment
+ // of the global.
+ if (GV->isDeclaration()) return Align;
+
+ if (GV->getAlignment() >= PrefAlign)
+ return GV->getAlignment();
+ // We can only increase the alignment of the global if it has no alignment
+ // specified or if it is not assigned a section. If it is assigned a
+ // section, the global could be densely packed with other objects in the
+ // section, increasing the alignment could cause padding issues.
+ if (!GV->hasSection() || GV->getAlignment() == 0)
+ GV->setAlignment(PrefAlign);
+ return GV->getAlignment();
+ }
+
+ return Align;
+}
+
+/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that
+/// we can determine, return it, otherwise return 0. If PrefAlign is specified,
+/// and it is more than the alignment of the ultimate object, see if we can
+/// increase the alignment of the ultimate object, making this check succeed.
+unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V,
+ unsigned PrefAlign) {
+ assert(V->getType()->isPointerTy() &&
+ "GetOrEnforceKnownAlignment expects a pointer!");
+ unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(V, Mask, KnownZero, KnownOne);
+ unsigned TrailZ = KnownZero.countTrailingOnes();
+
+ // Avoid trouble with rediculously large TrailZ values, such as
+ // those computed from a null pointer.
+ TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
+
+ unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
+
+ // LLVM doesn't support alignments larger than this currently.
+ Align = std::min(Align, +Value::MaximumAlignment);
+
+ if (PrefAlign > Align)
+ Align = EnforceKnownAlignment(V, Align, PrefAlign);
+
+ // We don't need to make any adjustment.
+ return Align;
+}
+
+Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
+ unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(0));
+ unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(1));
+ unsigned MinAlign = std::min(DstAlign, SrcAlign);
+ unsigned CopyAlign = MI->getAlignment();
+
+ if (CopyAlign < MinAlign) {
+ MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
+ MinAlign, false));
+ return MI;
+ }
+
+ // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
+ // load/store.
+ ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
+ if (MemOpLength == 0) return 0;
+
+ // Source and destination pointer types are always "i8*" for intrinsic. See
+ // if the size is something we can handle with a single primitive load/store.
+ // A single load+store correctly handles overlapping memory in the memmove
+ // case.
+ unsigned Size = MemOpLength->getZExtValue();
+ if (Size == 0) return MI; // Delete this mem transfer.
+
+ if (Size > 8 || (Size&(Size-1)))
+ return 0; // If not 1/2/4/8 bytes, exit.
+
+ // Use an integer load+store unless we can find something better.
+ unsigned SrcAddrSp =
+ cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
+ unsigned DstAddrSp =
+ cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
+
+ const IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
+ Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
+ Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
+
+ // Memcpy forces the use of i8* for the source and destination. That means
+ // that if you're using memcpy to move one double around, you'll get a cast
+ // from double* to i8*. We'd much rather use a double load+store rather than
+ // an i64 load+store, here because this improves the odds that the source or
+ // dest address will be promotable. See if we can find a better type than the
+ // integer datatype.
+ Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts();
+ if (StrippedDest != MI->getArgOperand(0)) {
+ const Type *SrcETy = cast<PointerType>(StrippedDest->getType())
+ ->getElementType();
+ if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
+ // The SrcETy might be something like {{{double}}} or [1 x double]. Rip
+ // down through these levels if so.
+ while (!SrcETy->isSingleValueType()) {
+ if (const StructType *STy = dyn_cast<StructType>(SrcETy)) {
+ if (STy->getNumElements() == 1)
+ SrcETy = STy->getElementType(0);
+ else
+ break;
+ } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) {
+ if (ATy->getNumElements() == 1)
+ SrcETy = ATy->getElementType();
+ else
+ break;
+ } else
+ break;
+ }
+
+ if (SrcETy->isSingleValueType()) {
+ NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp);
+ NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp);
+ }
+ }
+ }
+
+
+ // If the memcpy/memmove provides better alignment info than we can
+ // infer, use it.
+ SrcAlign = std::max(SrcAlign, CopyAlign);
+ DstAlign = std::max(DstAlign, CopyAlign);
+
+ Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
+ Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
+ Instruction *L = new LoadInst(Src, "tmp", MI->isVolatile(), SrcAlign);
+ InsertNewInstBefore(L, *MI);
+ InsertNewInstBefore(new StoreInst(L, Dest, MI->isVolatile(), DstAlign),
+ *MI);
+
+ // Set the size of the copy to 0, it will be deleted on the next iteration.
+ MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
+ return MI;
+}
+
+Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
+ unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest());
+ if (MI->getAlignment() < Alignment) {
+ MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
+ Alignment, false));
+ return MI;
+ }
+
+ // Extract the length and alignment and fill if they are constant.
+ ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
+ ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
+ if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
+ return 0;
+ uint64_t Len = LenC->getZExtValue();
+ Alignment = MI->getAlignment();
+
+ // If the length is zero, this is a no-op
+ if (Len == 0) return MI; // memset(d,c,0,a) -> noop
+
+ // memset(s,c,n) -> store s, c (for n=1,2,4,8)
+ if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
+ const Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
+
+ Value *Dest = MI->getDest();
+ Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy));
+
+ // Alignment 0 is identity for alignment 1 for memset, but not store.
+ if (Alignment == 0) Alignment = 1;
+
+ // Extract the fill value and store.
+ uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
+ InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill),
+ Dest, false, Alignment), *MI);
+
+ // Set the size of the copy to 0, it will be deleted on the next iteration.
+ MI->setLength(Constant::getNullValue(LenC->getType()));
+ return MI;
+ }
+
+ return 0;
+}
+
+/// visitCallInst - CallInst simplification. This mostly only handles folding
+/// of intrinsic instructions. For normal calls, it allows visitCallSite to do
+/// the heavy lifting.
+///
+Instruction *InstCombiner::visitCallInst(CallInst &CI) {
+ if (isFreeCall(&CI))
+ return visitFree(CI);
+ if (isMalloc(&CI))
+ return visitMalloc(CI);
+
+ // If the caller function is nounwind, mark the call as nounwind, even if the
+ // callee isn't.
+ if (CI.getParent()->getParent()->doesNotThrow() &&
+ !CI.doesNotThrow()) {
+ CI.setDoesNotThrow();
+ return &CI;
+ }
+
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
+ if (!II) return visitCallSite(&CI);
+
+ // Intrinsics cannot occur in an invoke, so handle them here instead of in
+ // visitCallSite.
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
+ bool Changed = false;
+
+ // memmove/cpy/set of zero bytes is a noop.
+ if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
+ if (NumBytes->isNullValue()) return EraseInstFromFunction(CI);
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
+ if (CI->getZExtValue() == 1) {
+ // Replace the instruction with just byte operations. We would
+ // transform other cases to loads/stores, but we don't know if
+ // alignment is sufficient.
+ }
+ }
+
+ // If we have a memmove and the source operation is a constant global,
+ // then the source and dest pointers can't alias, so we can change this
+ // into a call to memcpy.
+ if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
+ if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
+ if (GVSrc->isConstant()) {
+ Module *M = CI.getParent()->getParent()->getParent();
+ Intrinsic::ID MemCpyID = Intrinsic::memcpy;
+ const Type *Tys[3] = { CI.getArgOperand(0)->getType(),
+ CI.getArgOperand(1)->getType(),
+ CI.getArgOperand(2)->getType() };
+ CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys, 3));
+ Changed = true;
+ }
+ }
+
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
+ // memmove(x,x,size) -> noop.
+ if (MTI->getSource() == MTI->getDest())
+ return EraseInstFromFunction(CI);
+ }
+
+ // If we can determine a pointer alignment that is bigger than currently
+ // set, update the alignment.
+ if (isa<MemTransferInst>(MI)) {
+ if (Instruction *I = SimplifyMemTransfer(MI))
+ return I;
+ } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
+ if (Instruction *I = SimplifyMemSet(MSI))
+ return I;
+ }
+
+ if (Changed) return II;
+ }
+
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::objectsize: {
+ // We need target data for just about everything so depend on it.
+ if (!TD) break;
+
+ const Type *ReturnTy = CI.getType();
+ bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
+
+ // Get to the real allocated thing and offset as fast as possible.
+ Value *Op1 = II->getArgOperand(0)->stripPointerCasts();
+
+ // If we've stripped down to a single global variable that we
+ // can know the size of then just return that.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) {
+ if (GV->hasDefinitiveInitializer()) {
+ Constant *C = GV->getInitializer();
+ uint64_t GlobalSize = TD->getTypeAllocSize(C->getType());
+ return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, GlobalSize));
+ } else {
+ // Can't determine size of the GV.
+ Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
+ return ReplaceInstUsesWith(CI, RetVal);
+ }
+ } else if (AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) {
+ // Get alloca size.
+ if (AI->getAllocatedType()->isSized()) {
+ uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
+ if (AI->isArrayAllocation()) {
+ const ConstantInt *C = dyn_cast<ConstantInt>(AI->getArraySize());
+ if (!C) break;
+ AllocaSize *= C->getZExtValue();
+ }
+ return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, AllocaSize));
+ }
+ } else if (CallInst *MI = extractMallocCall(Op1)) {
+ const Type* MallocType = getMallocAllocatedType(MI);
+ // Get alloca size.
+ if (MallocType && MallocType->isSized()) {
+ if (Value *NElems = getMallocArraySize(MI, TD, true)) {
+ if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
+ return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy,
+ (NElements->getZExtValue() * TD->getTypeAllocSize(MallocType))));
+ }
+ }
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op1)) {
+ // Only handle constant GEPs here.
+ if (CE->getOpcode() != Instruction::GetElementPtr) break;
+ GEPOperator *GEP = cast<GEPOperator>(CE);
+
+ // Make sure we're not a constant offset from an external
+ // global.
+ Value *Operand = GEP->getPointerOperand();
+ Operand = Operand->stripPointerCasts();
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand))
+ if (!GV->hasDefinitiveInitializer()) break;
+
+ // Get what we're pointing to and its size.
+ const PointerType *BaseType =
+ cast<PointerType>(Operand->getType());
+ uint64_t Size = TD->getTypeAllocSize(BaseType->getElementType());
+
+ // Get the current byte offset into the thing. Use the original
+ // operand in case we're looking through a bitcast.
+ SmallVector<Value*, 8> Ops(CE->op_begin()+1, CE->op_end());
+ const PointerType *OffsetType =
+ cast<PointerType>(GEP->getPointerOperand()->getType());
+ uint64_t Offset = TD->getIndexedOffset(OffsetType, &Ops[0], Ops.size());
+
+ if (Size < Offset) {
+ // Out of bound reference? Negative index normalized to large
+ // index? Just return "I don't know".
+ Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
+ return ReplaceInstUsesWith(CI, RetVal);
+ }
+
+ Constant *RetVal = ConstantInt::get(ReturnTy, Size-Offset);
+ return ReplaceInstUsesWith(CI, RetVal);
+ }
+
+ // Do not return "I don't know" here. Later optimization passes could
+ // make it possible to evaluate objectsize to a constant.
+ break;
+ }
+ case Intrinsic::bswap:
+ // bswap(bswap(x)) -> x
+ if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getArgOperand(0)))
+ if (Operand->getIntrinsicID() == Intrinsic::bswap)
+ return ReplaceInstUsesWith(CI, Operand->getArgOperand(0));
+
+ // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
+ if (TruncInst *TI = dyn_cast<TruncInst>(II->getArgOperand(0))) {
+ if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0)))
+ if (Operand->getIntrinsicID() == Intrinsic::bswap) {
+ unsigned C = Operand->getType()->getPrimitiveSizeInBits() -
+ TI->getType()->getPrimitiveSizeInBits();
+ Value *CV = ConstantInt::get(Operand->getType(), C);
+ Value *V = Builder->CreateLShr(Operand->getArgOperand(0), CV);
+ return new TruncInst(V, TI->getType());
+ }
+ }
+
+ break;
+ case Intrinsic::powi:
+ if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+ // powi(x, 0) -> 1.0
+ if (Power->isZero())
+ return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
+ // powi(x, 1) -> x
+ if (Power->isOne())
+ return ReplaceInstUsesWith(CI, II->getArgOperand(0));
+ // powi(x, -1) -> 1/x
+ if (Power->isAllOnesValue())
+ return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
+ II->getArgOperand(0));
+ }
+ break;
+ case Intrinsic::cttz: {
+ // If all bits below the first known one are known zero,
+ // this value is constant.
+ const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
+ uint32_t BitWidth = IT->getBitWidth();
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth),
+ KnownZero, KnownOne);
+ unsigned TrailingZeros = KnownOne.countTrailingZeros();
+ APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
+ if ((Mask & KnownZero) == Mask)
+ return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
+ APInt(BitWidth, TrailingZeros)));
+
+ }
+ break;
+ case Intrinsic::ctlz: {
+ // If all bits above the first known one are known zero,
+ // this value is constant.
+ const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
+ uint32_t BitWidth = IT->getBitWidth();
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth),
+ KnownZero, KnownOne);
+ unsigned LeadingZeros = KnownOne.countLeadingZeros();
+ APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
+ if ((Mask & KnownZero) == Mask)
+ return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
+ APInt(BitWidth, LeadingZeros)));
+
+ }
+ break;
+ case Intrinsic::uadd_with_overflow: {
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
+ uint32_t BitWidth = IT->getBitWidth();
+ APInt Mask = APInt::getSignBit(BitWidth);
+ APInt LHSKnownZero(BitWidth, 0);
+ APInt LHSKnownOne(BitWidth, 0);
+ ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+ bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
+ bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
+
+ if (LHSKnownNegative || LHSKnownPositive) {
+ APInt RHSKnownZero(BitWidth, 0);
+ APInt RHSKnownOne(BitWidth, 0);
+ ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+ bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
+ bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
+ if (LHSKnownNegative && RHSKnownNegative) {
+ // The sign bit is set in both cases: this MUST overflow.
+ // Create a simple add instruction, and insert it into the struct.
+ Instruction *Add = BinaryOperator::CreateAdd(LHS, RHS, "", &CI);
+ Worklist.Add(Add);
+ Constant *V[] = {
+ UndefValue::get(LHS->getType()),ConstantInt::getTrue(II->getContext())
+ };
+ Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+ return InsertValueInst::Create(Struct, Add, 0);
+ }
+
+ if (LHSKnownPositive && RHSKnownPositive) {
+ // The sign bit is clear in both cases: this CANNOT overflow.
+ // Create a simple add instruction, and insert it into the struct.
+ Instruction *Add = BinaryOperator::CreateNUWAdd(LHS, RHS, "", &CI);
+ Worklist.Add(Add);
+ Constant *V[] = {
+ UndefValue::get(LHS->getType()),
+ ConstantInt::getFalse(II->getContext())
+ };
+ Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+ return InsertValueInst::Create(Struct, Add, 0);
+ }
+ }
+ }
+ // FALL THROUGH uadd into sadd
+ case Intrinsic::sadd_with_overflow:
+ // Canonicalize constants into the RHS.
+ if (isa<Constant>(II->getArgOperand(0)) &&
+ !isa<Constant>(II->getArgOperand(1))) {
+ Value *LHS = II->getArgOperand(0);
+ II->setArgOperand(0, II->getArgOperand(1));
+ II->setArgOperand(1, LHS);
+ return II;
+ }
+
+ // X + undef -> undef
+ if (isa<UndefValue>(II->getArgOperand(1)))
+ return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+ // X + 0 -> {X, false}
+ if (RHS->isZero()) {
+ Constant *V[] = {
+ UndefValue::get(II->getArgOperand(0)->getType()),
+ ConstantInt::getFalse(II->getContext())
+ };
+ Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+ return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
+ }
+ }
+ break;
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ // undef - X -> undef
+ // X - undef -> undef
+ if (isa<UndefValue>(II->getArgOperand(0)) ||
+ isa<UndefValue>(II->getArgOperand(1)))
+ return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+ // X - 0 -> {X, false}
+ if (RHS->isZero()) {
+ Constant *V[] = {
+ UndefValue::get(II->getArgOperand(0)->getType()),
+ ConstantInt::getFalse(II->getContext())
+ };
+ Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+ return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
+ }
+ }
+ break;
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ // Canonicalize constants into the RHS.
+ if (isa<Constant>(II->getArgOperand(0)) &&
+ !isa<Constant>(II->getArgOperand(1))) {
+ Value *LHS = II->getArgOperand(0);
+ II->setArgOperand(0, II->getArgOperand(1));
+ II->setArgOperand(1, LHS);
+ return II;
+ }
+
+ // X * undef -> undef
+ if (isa<UndefValue>(II->getArgOperand(1)))
+ return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+
+ if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+ // X*0 -> {0, false}
+ if (RHSI->isZero())
+ return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
+
+ // X * 1 -> {X, false}
+ if (RHSI->equalsInt(1)) {
+ Constant *V[] = {
+ UndefValue::get(II->getArgOperand(0)->getType()),
+ ConstantInt::getFalse(II->getContext())
+ };
+ Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+ return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
+ }
+ }
+ break;
+ case Intrinsic::ppc_altivec_lvx:
+ case Intrinsic::ppc_altivec_lvxl:
+ case Intrinsic::x86_sse_loadu_ps:
+ case Intrinsic::x86_sse2_loadu_pd:
+ case Intrinsic::x86_sse2_loadu_dq:
+ // Turn PPC lvx -> load if the pointer is known aligned.
+ // Turn X86 loadups -> load if the pointer is known aligned.
+ if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) {
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+ PointerType::getUnqual(II->getType()));
+ return new LoadInst(Ptr);
+ }
+ break;
+ case Intrinsic::ppc_altivec_stvx:
+ case Intrinsic::ppc_altivec_stvxl:
+ // Turn stvx -> store if the pointer is known aligned.
+ if (GetOrEnforceKnownAlignment(II->getArgOperand(1), 16) >= 16) {
+ const Type *OpPtrTy =
+ PointerType::getUnqual(II->getArgOperand(0)->getType());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+ return new StoreInst(II->getArgOperand(0), Ptr);
+ }
+ break;
+ case Intrinsic::x86_sse_storeu_ps:
+ case Intrinsic::x86_sse2_storeu_pd:
+ case Intrinsic::x86_sse2_storeu_dq:
+ // Turn X86 storeu -> store if the pointer is known aligned.
+ if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) {
+ const Type *OpPtrTy =
+ PointerType::getUnqual(II->getArgOperand(1)->getType());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
+ return new StoreInst(II->getArgOperand(1), Ptr);
+ }
+ break;
+
+ case Intrinsic::x86_sse_cvttss2si: {
+ // These intrinsics only demands the 0th element of its input vector. If
+ // we can simplify the input based on that, do so now.
+ unsigned VWidth =
+ cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
+ APInt DemandedElts(VWidth, 1);
+ APInt UndefElts(VWidth, 0);
+ if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0),
+ DemandedElts, UndefElts)) {
+ II->setArgOperand(0, V);
+ return II;
+ }
+ break;
+ }
+
+ case Intrinsic::ppc_altivec_vperm:
+ // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
+ if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) {
+ assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
+
+ // Check that all of the elements are integer constants or undefs.
+ bool AllEltsOk = true;
+ for (unsigned i = 0; i != 16; ++i) {
+ if (!isa<ConstantInt>(Mask->getOperand(i)) &&
+ !isa<UndefValue>(Mask->getOperand(i))) {
+ AllEltsOk = false;
+ break;
+ }
+ }
+
+ if (AllEltsOk) {
+ // Cast the input vectors to byte vectors.
+ Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0),
+ Mask->getType());
+ Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1),
+ Mask->getType());
+ Value *Result = UndefValue::get(Op0->getType());
+
+ // Only extract each element once.
+ Value *ExtractedElts[32];
+ memset(ExtractedElts, 0, sizeof(ExtractedElts));
+
+ for (unsigned i = 0; i != 16; ++i) {
+ if (isa<UndefValue>(Mask->getOperand(i)))
+ continue;
+ unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
+ Idx &= 31; // Match the hardware behavior.
+
+ if (ExtractedElts[Idx] == 0) {
+ ExtractedElts[Idx] =
+ Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,
+ ConstantInt::get(Type::getInt32Ty(II->getContext()),
+ Idx&15, false), "tmp");
+ }
+
+ // Insert this value into the result vector.
+ Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
+ ConstantInt::get(Type::getInt32Ty(II->getContext()),
+ i, false), "tmp");
+ }
+ return CastInst::Create(Instruction::BitCast, Result, CI.getType());
+ }
+ }
+ break;
+
+ case Intrinsic::stackrestore: {
+ // If the save is right next to the restore, remove the restore. This can
+ // happen when variable allocas are DCE'd.
+ if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
+ if (SS->getIntrinsicID() == Intrinsic::stacksave) {
+ BasicBlock::iterator BI = SS;
+ if (&*++BI == II)
+ return EraseInstFromFunction(CI);
+ }
+ }
+
+ // Scan down this block to see if there is another stack restore in the
+ // same block without an intervening call/alloca.
+ BasicBlock::iterator BI = II;
+ TerminatorInst *TI = II->getParent()->getTerminator();
+ bool CannotRemove = false;
+ for (++BI; &*BI != TI; ++BI) {
+ if (isa<AllocaInst>(BI) || isMalloc(BI)) {
+ CannotRemove = true;
+ break;
+ }
+ if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
+ // If there is a stackrestore below this one, remove this one.
+ if (II->getIntrinsicID() == Intrinsic::stackrestore)
+ return EraseInstFromFunction(CI);
+ // Otherwise, ignore the intrinsic.
+ } else {
+ // If we found a non-intrinsic call, we can't remove the stack
+ // restore.
+ CannotRemove = true;
+ break;
+ }
+ }
+ }
+
+ // If the stack restore is in a return/unwind block and if there are no
+ // allocas or calls between the restore and the return, nuke the restore.
+ if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI)))
+ return EraseInstFromFunction(CI);
+ break;
+ }
+ }
+
+ return visitCallSite(II);
+}
+
+// InvokeInst simplification
+//
+Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
+ return visitCallSite(&II);
+}
+
+/// isSafeToEliminateVarargsCast - If this cast does not affect the value
+/// passed through the varargs area, we can eliminate the use of the cast.
+static bool isSafeToEliminateVarargsCast(const CallSite CS,
+ const CastInst * const CI,
+ const TargetData * const TD,
+ const int ix) {
+ if (!CI->isLosslessCast())
+ return false;
+
+ // The size of ByVal arguments is derived from the type, so we
+ // can't change to a type with a different size. If the size were
+ // passed explicitly we could avoid this check.
+ if (!CS.paramHasAttr(ix, Attribute::ByVal))
+ return true;
+
+ const Type* SrcTy =
+ cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
+ const Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
+ if (!SrcTy->isSized() || !DstTy->isSized())
+ return false;
+ if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
+ return false;
+ return true;
+}
+
+namespace {
+class InstCombineFortifiedLibCalls : public SimplifyFortifiedLibCalls {
+ InstCombiner *IC;
+protected:
+ void replaceCall(Value *With) {
+ NewInstruction = IC->ReplaceInstUsesWith(*CI, With);
+ }
+ bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
+ if (ConstantInt *SizeCI =
+ dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
+ if (SizeCI->isAllOnesValue())
+ return true;
+ if (isString)
+ return SizeCI->getZExtValue() >=
+ GetStringLength(CI->getArgOperand(SizeArgOp));
+ if (ConstantInt *Arg = dyn_cast<ConstantInt>(
+ CI->getArgOperand(SizeArgOp)))
+ return SizeCI->getZExtValue() >= Arg->getZExtValue();
+ }
+ return false;
+ }
+public:
+ InstCombineFortifiedLibCalls(InstCombiner *IC) : IC(IC), NewInstruction(0) { }
+ Instruction *NewInstruction;
+};
+} // end anonymous namespace
+
+// Try to fold some different type of calls here.
+// Currently we're only working with the checking functions, memcpy_chk,
+// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
+// strcat_chk and strncat_chk.
+Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
+ if (CI->getCalledFunction() == 0) return 0;
+
+ InstCombineFortifiedLibCalls Simplifier(this);
+ Simplifier.fold(CI, TD);
+ return Simplifier.NewInstruction;
+}
+
+// visitCallSite - Improvements for call and invoke instructions.
+//
+Instruction *InstCombiner::visitCallSite(CallSite CS) {
+ bool Changed = false;
+
+ // If the callee is a constexpr cast of a function, attempt to move the cast
+ // to the arguments of the call/invoke.
+ if (transformConstExprCastCall(CS)) return 0;
+
+ Value *Callee = CS.getCalledValue();
+
+ if (Function *CalleeF = dyn_cast<Function>(Callee))
+ // If the call and callee calling conventions don't match, this call must
+ // be unreachable, as the call is undefined.
+ if (CalleeF->getCallingConv() != CS.getCallingConv() &&
+ // Only do this for calls to a function with a body. A prototype may
+ // not actually end up matching the implementation's calling conv for a
+ // variety of reasons (e.g. it may be written in assembly).
+ !CalleeF->isDeclaration()) {
+ Instruction *OldCall = CS.getInstruction();
+ new StoreInst(ConstantInt::getTrue(Callee->getContext()),
+ UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
+ OldCall);
+ // If OldCall dues not return void then replaceAllUsesWith undef.
+ // This allows ValueHandlers and custom metadata to adjust itself.
+ if (!OldCall->getType()->isVoidTy())
+ OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType()));
+ if (isa<CallInst>(OldCall))
+ return EraseInstFromFunction(*OldCall);
+
+ // We cannot remove an invoke, because it would change the CFG, just
+ // change the callee to a null pointer.
+ cast<InvokeInst>(OldCall)->setCalledFunction(
+ Constant::getNullValue(CalleeF->getType()));
+ return 0;
+ }
+
+ if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
+ // This instruction is not reachable, just remove it. We insert a store to
+ // undef so that we know that this code is not reachable, despite the fact
+ // that we can't modify the CFG here.
+ new StoreInst(ConstantInt::getTrue(Callee->getContext()),
+ UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
+ CS.getInstruction());
+
+ // If CS does not return void then replaceAllUsesWith undef.
+ // This allows ValueHandlers and custom metadata to adjust itself.
+ if (!CS.getInstruction()->getType()->isVoidTy())
+ CS.getInstruction()->
+ replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType()));
+
+ if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
+ // Don't break the CFG, insert a dummy cond branch.
+ BranchInst::Create(II->getNormalDest(), II->getUnwindDest(),
+ ConstantInt::getTrue(Callee->getContext()), II);
+ }
+ return EraseInstFromFunction(*CS.getInstruction());
+ }
+
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee))
+ if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0)))
+ if (In->getIntrinsicID() == Intrinsic::init_trampoline)
+ return transformCallThroughTrampoline(CS);
+
+ const PointerType *PTy = cast<PointerType>(Callee->getType());
+ const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ if (FTy->isVarArg()) {
+ int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1);
+ // See if we can optimize any arguments passed through the varargs area of
+ // the call.
+ for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
+ E = CS.arg_end(); I != E; ++I, ++ix) {
+ CastInst *CI = dyn_cast<CastInst>(*I);
+ if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) {
+ *I = CI->getOperand(0);
+ Changed = true;
+ }
+ }
+ }
+
+ if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
+ // Inline asm calls cannot throw - mark them 'nounwind'.
+ CS.setDoesNotThrow();
+ Changed = true;
+ }
+
+ // Try to optimize the call if possible, we require TargetData for most of
+ // this. None of these calls are seen as possibly dead so go ahead and
+ // delete the instruction now.
+ if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
+ Instruction *I = tryOptimizeCall(CI, TD);
+ // If we changed something return the result, etc. Otherwise let
+ // the fallthrough check.
+ if (I) return EraseInstFromFunction(*I);
+ }
+
+ return Changed ? CS.getInstruction() : 0;
+}
+
+// transformConstExprCastCall - If the callee is a constexpr cast of a function,
+// attempt to move the cast to the arguments of the call/invoke.
+//
+bool InstCombiner::transformConstExprCastCall(CallSite CS) {
+ if (!isa<ConstantExpr>(CS.getCalledValue())) return false;
+ ConstantExpr *CE = cast<ConstantExpr>(CS.getCalledValue());
+ if (CE->getOpcode() != Instruction::BitCast ||
+ !isa<Function>(CE->getOperand(0)))
+ return false;
+ Function *Callee = cast<Function>(CE->getOperand(0));
+ Instruction *Caller = CS.getInstruction();
+ const AttrListPtr &CallerPAL = CS.getAttributes();
+
+ // Okay, this is a cast from a function to a different type. Unless doing so
+ // would cause a type conversion of one of our arguments, change this call to
+ // be a direct call with arguments casted to the appropriate types.
+ //
+ const FunctionType *FT = Callee->getFunctionType();
+ const Type *OldRetTy = Caller->getType();
+ const Type *NewRetTy = FT->getReturnType();
+
+ if (NewRetTy->isStructTy())
+ return false; // TODO: Handle multiple return values.
+
+ // Check to see if we are changing the return type...
+ if (OldRetTy != NewRetTy) {
+ if (Callee->isDeclaration() &&
+ // Conversion is ok if changing from one pointer type to another or from
+ // a pointer to an integer of the same size.
+ !((OldRetTy->isPointerTy() || !TD ||
+ OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
+ (NewRetTy->isPointerTy() || !TD ||
+ NewRetTy == TD->getIntPtrType(Caller->getContext()))))
+ return false; // Cannot transform this return value.
+
+ if (!Caller->use_empty() &&
+ // void -> non-void is handled specially
+ !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy))
+ return false; // Cannot transform this return value.
+
+ if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
+ Attributes RAttrs = CallerPAL.getRetAttributes();
+ if (RAttrs & Attribute::typeIncompatible(NewRetTy))
+ return false; // Attribute not compatible with transformed value.
+ }
+
+ // If the callsite is an invoke instruction, and the return value is used by
+ // a PHI node in a successor, we cannot change the return type of the call
+ // because there is no place to put the cast instruction (without breaking
+ // the critical edge). Bail out in this case.
+ if (!Caller->use_empty())
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
+ for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
+ UI != E; ++UI)
+ if (PHINode *PN = dyn_cast<PHINode>(*UI))
+ if (PN->getParent() == II->getNormalDest() ||
+ PN->getParent() == II->getUnwindDest())
+ return false;
+ }
+
+ unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
+ unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
+
+ CallSite::arg_iterator AI = CS.arg_begin();
+ for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
+ const Type *ParamTy = FT->getParamType(i);
+ const Type *ActTy = (*AI)->getType();
+
+ if (!CastInst::isCastable(ActTy, ParamTy))
+ return false; // Cannot transform this parameter value.
+
+ if (CallerPAL.getParamAttributes(i + 1)
+ & Attribute::typeIncompatible(ParamTy))
+ return false; // Attribute not compatible with transformed value.
+
+ // Converting from one pointer type to another or between a pointer and an
+ // integer of the same size is safe even if we do not have a body.
+ bool isConvertible = ActTy == ParamTy ||
+ (TD && ((ParamTy->isPointerTy() ||
+ ParamTy == TD->getIntPtrType(Caller->getContext())) &&
+ (ActTy->isPointerTy() ||
+ ActTy == TD->getIntPtrType(Caller->getContext()))));
+ if (Callee->isDeclaration() && !isConvertible) return false;
+ }
+
+ if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() &&
+ Callee->isDeclaration())
+ return false; // Do not delete arguments unless we have a function body.
+
+ if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
+ !CallerPAL.isEmpty())
+ // In this case we have more arguments than the new function type, but we
+ // won't be dropping them. Check that these extra arguments have attributes
+ // that are compatible with being a vararg call argument.
+ for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
+ if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams())
+ break;
+ Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs;
+ if (PAttrs & Attribute::VarArgsIncompatible)
+ return false;
+ }
+
+ // Okay, we decided that this is a safe thing to do: go ahead and start
+ // inserting cast instructions as necessary...
+ std::vector<Value*> Args;
+ Args.reserve(NumActualArgs);
+ SmallVector<AttributeWithIndex, 8> attrVec;
+ attrVec.reserve(NumCommonArgs);
+
+ // Get any return attributes.
+ Attributes RAttrs = CallerPAL.getRetAttributes();
+
+ // If the return value is not being used, the type may not be compatible
+ // with the existing attributes. Wipe out any problematic attributes.
+ RAttrs &= ~Attribute::typeIncompatible(NewRetTy);
+
+ // Add the new return attributes.
+ if (RAttrs)
+ attrVec.push_back(AttributeWithIndex::get(0, RAttrs));
+
+ AI = CS.arg_begin();
+ for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
+ const Type *ParamTy = FT->getParamType(i);
+ if ((*AI)->getType() == ParamTy) {
+ Args.push_back(*AI);
+ } else {
+ Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
+ false, ParamTy, false);
+ Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp"));
+ }
+
+ // Add any parameter attributes.
+ if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
+ attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
+ }
+
+ // If the function takes more arguments than the call was taking, add them
+ // now.
+ for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
+ Args.push_back(Constant::getNullValue(FT->getParamType(i)));
+
+ // If we are removing arguments to the function, emit an obnoxious warning.
+ if (FT->getNumParams() < NumActualArgs) {
+ if (!FT->isVarArg()) {
+ errs() << "WARNING: While resolving call to function '"
+ << Callee->getName() << "' arguments were dropped!\n";
+ } else {
+ // Add all of the arguments in their promoted form to the arg list.
+ for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
+ const Type *PTy = getPromotedType((*AI)->getType());
+ if (PTy != (*AI)->getType()) {
+ // Must promote to pass through va_arg area!
+ Instruction::CastOps opcode =
+ CastInst::getCastOpcode(*AI, false, PTy, false);
+ Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp"));
+ } else {
+ Args.push_back(*AI);
+ }
+
+ // Add any parameter attributes.
+ if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
+ attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
+ }
+ }
+ }
+
+ if (Attributes FnAttrs = CallerPAL.getFnAttributes())
+ attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+ if (NewRetTy->isVoidTy())
+ Caller->setName(""); // Void type should not have a name.
+
+ const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(),
+ attrVec.end());
+
+ Instruction *NC;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+ NC = InvokeInst::Create(Callee, II->getNormalDest(), II->getUnwindDest(),
+ Args.begin(), Args.end(),
+ Caller->getName(), Caller);
+ cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
+ cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
+ } else {
+ NC = CallInst::Create(Callee, Args.begin(), Args.end(),
+ Caller->getName(), Caller);
+ CallInst *CI = cast<CallInst>(Caller);
+ if (CI->isTailCall())
+ cast<CallInst>(NC)->setTailCall();
+ cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
+ cast<CallInst>(NC)->setAttributes(NewCallerPAL);
+ }
+
+ // Insert a cast of the return type as necessary.
+ Value *NV = NC;
+ if (OldRetTy != NV->getType() && !Caller->use_empty()) {
+ if (!NV->getType()->isVoidTy()) {
+ Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false,
+ OldRetTy, false);
+ NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
+
+ // If this is an invoke instruction, we should insert it after the first
+ // non-phi, instruction in the normal successor block.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+ BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI();
+ InsertNewInstBefore(NC, *I);
+ } else {
+ // Otherwise, it's a call, just insert cast right after the call instr
+ InsertNewInstBefore(NC, *Caller);
+ }
+ Worklist.AddUsersToWorkList(*Caller);
+ } else {
+ NV = UndefValue::get(Caller->getType());
+ }
+ }
+
+
+ if (!Caller->use_empty())
+ Caller->replaceAllUsesWith(NV);
+
+ EraseInstFromFunction(*Caller);
+ return true;
+}
+
+// transformCallThroughTrampoline - Turn a call to a function created by the
+// init_trampoline intrinsic into a direct call to the underlying function.
+//
+Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
+ Value *Callee = CS.getCalledValue();
+ const PointerType *PTy = cast<PointerType>(Callee->getType());
+ const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ const AttrListPtr &Attrs = CS.getAttributes();
+
+ // If the call already has the 'nest' attribute somewhere then give up -
+ // otherwise 'nest' would occur twice after splicing in the chain.
+ if (Attrs.hasAttrSomewhere(Attribute::Nest))
+ return 0;
+
+ IntrinsicInst *Tramp =
+ cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
+
+ Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
+ const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
+ const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
+
+ const AttrListPtr &NestAttrs = NestF->getAttributes();
+ if (!NestAttrs.isEmpty()) {
+ unsigned NestIdx = 1;
+ const Type *NestTy = 0;
+ Attributes NestAttr = Attribute::None;
+
+ // Look for a parameter marked with the 'nest' attribute.
+ for (FunctionType::param_iterator I = NestFTy->param_begin(),
+ E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
+ if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) {
+ // Record the parameter type and any other attributes.
+ NestTy = *I;
+ NestAttr = NestAttrs.getParamAttributes(NestIdx);
+ break;
+ }
+
+ if (NestTy) {
+ Instruction *Caller = CS.getInstruction();
+ std::vector<Value*> NewArgs;
+ NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
+
+ SmallVector<AttributeWithIndex, 8> NewAttrs;
+ NewAttrs.reserve(Attrs.getNumSlots() + 1);
+
+ // Insert the nest argument into the call argument list, which may
+ // mean appending it. Likewise for attributes.
+
+ // Add any result attributes.
+ if (Attributes Attr = Attrs.getRetAttributes())
+ NewAttrs.push_back(AttributeWithIndex::get(0, Attr));
+
+ {
+ unsigned Idx = 1;
+ CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+ do {
+ if (Idx == NestIdx) {
+ // Add the chain argument and attributes.
+ Value *NestVal = Tramp->getArgOperand(2);
+ if (NestVal->getType() != NestTy)
+ NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller);
+ NewArgs.push_back(NestVal);
+ NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr));
+ }
+
+ if (I == E)
+ break;
+
+ // Add the original argument and attributes.
+ NewArgs.push_back(*I);
+ if (Attributes Attr = Attrs.getParamAttributes(Idx))
+ NewAttrs.push_back
+ (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr));
+
+ ++Idx, ++I;
+ } while (1);
+ }
+
+ // Add any function attributes.
+ if (Attributes Attr = Attrs.getFnAttributes())
+ NewAttrs.push_back(AttributeWithIndex::get(~0, Attr));
+
+ // The trampoline may have been bitcast to a bogus type (FTy).
+ // Handle this by synthesizing a new function type, equal to FTy
+ // with the chain parameter inserted.
+
+ std::vector<const Type*> NewTypes;
+ NewTypes.reserve(FTy->getNumParams()+1);
+
+ // Insert the chain's type into the list of parameter types, which may
+ // mean appending it.
+ {
+ unsigned Idx = 1;
+ FunctionType::param_iterator I = FTy->param_begin(),
+ E = FTy->param_end();
+
+ do {
+ if (Idx == NestIdx)
+ // Add the chain's type.
+ NewTypes.push_back(NestTy);
+
+ if (I == E)
+ break;
+
+ // Add the original type.
+ NewTypes.push_back(*I);
+
+ ++Idx, ++I;
+ } while (1);
+ }
+
+ // Replace the trampoline call with a direct call. Let the generic
+ // code sort out any function type mismatches.
+ FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
+ FTy->isVarArg());
+ Constant *NewCallee =
+ NestF->getType() == PointerType::getUnqual(NewFTy) ?
+ NestF : ConstantExpr::getBitCast(NestF,
+ PointerType::getUnqual(NewFTy));
+ const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),
+ NewAttrs.end());
+
+ Instruction *NewCaller;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+ NewCaller = InvokeInst::Create(NewCallee,
+ II->getNormalDest(), II->getUnwindDest(),
+ NewArgs.begin(), NewArgs.end(),
+ Caller->getName(), Caller);
+ cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
+ cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
+ } else {
+ NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(),
+ Caller->getName(), Caller);
+ if (cast<CallInst>(Caller)->isTailCall())
+ cast<CallInst>(NewCaller)->setTailCall();
+ cast<CallInst>(NewCaller)->
+ setCallingConv(cast<CallInst>(Caller)->getCallingConv());
+ cast<CallInst>(NewCaller)->setAttributes(NewPAL);
+ }
+ if (!Caller->getType()->isVoidTy())
+ Caller->replaceAllUsesWith(NewCaller);
+ Caller->eraseFromParent();
+ Worklist.Remove(Caller);
+ return 0;
+ }
+ }
+
+ // Replace the trampoline call with a direct call. Since there is no 'nest'
+ // parameter, there is no need to adjust the argument list. Let the generic
+ // code sort out any function type mismatches.
+ Constant *NewCallee =
+ NestF->getType() == PTy ? NestF :
+ ConstantExpr::getBitCast(NestF, PTy);
+ CS.setCalledFunction(NewCallee);
+ return CS.getInstruction();
+}
+
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
new file mode 100644
index 0000000..79a9b09
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -0,0 +1,1686 @@
+//===- InstCombineCasts.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for cast operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear
+/// expression. If so, decompose it, returning some value X, such that Val is
+/// X*Scale+Offset.
+///
+static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
+ uint64_t &Offset) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+ Offset = CI->getZExtValue();
+ Scale = 0;
+ return ConstantInt::get(Val->getType(), 0);
+ }
+
+ if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ if (I->getOpcode() == Instruction::Shl) {
+ // This is a value scaled by '1 << the shift amt'.
+ Scale = UINT64_C(1) << RHS->getZExtValue();
+ Offset = 0;
+ return I->getOperand(0);
+ }
+
+ if (I->getOpcode() == Instruction::Mul) {
+ // This value is scaled by 'RHS'.
+ Scale = RHS->getZExtValue();
+ Offset = 0;
+ return I->getOperand(0);
+ }
+
+ if (I->getOpcode() == Instruction::Add) {
+ // We have X+C. Check to see if we really have (X*C2)+C1,
+ // where C1 is divisible by C2.
+ unsigned SubScale;
+ Value *SubVal =
+ DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
+ Offset += RHS->getZExtValue();
+ Scale = SubScale;
+ return SubVal;
+ }
+ }
+ }
+
+ // Otherwise, we can't look past this.
+ Scale = 1;
+ Offset = 0;
+ return Val;
+}
+
+/// PromoteCastOfAllocation - If we find a cast of an allocation instruction,
+/// try to eliminate the cast by moving the type information into the alloc.
+Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
+ AllocaInst &AI) {
+ // This requires TargetData to get the alloca alignment and size information.
+ if (!TD) return 0;
+
+ const PointerType *PTy = cast<PointerType>(CI.getType());
+
+ BuilderTy AllocaBuilder(*Builder);
+ AllocaBuilder.SetInsertPoint(AI.getParent(), &AI);
+
+ // Get the type really allocated and the type casted to.
+ const Type *AllocElTy = AI.getAllocatedType();
+ const Type *CastElTy = PTy->getElementType();
+ if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0;
+
+ unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy);
+ unsigned CastElTyAlign = TD->getABITypeAlignment(CastElTy);
+ if (CastElTyAlign < AllocElTyAlign) return 0;
+
+ // If the allocation has multiple uses, only promote it if we are strictly
+ // increasing the alignment of the resultant allocation. If we keep it the
+ // same, we open the door to infinite loops of various kinds. (A reference
+ // from a dbg.declare doesn't count as a use for this purpose.)
+ if (!AI.hasOneUse() && !hasOneUsePlusDeclare(&AI) &&
+ CastElTyAlign == AllocElTyAlign) return 0;
+
+ uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy);
+ uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy);
+ if (CastElTySize == 0 || AllocElTySize == 0) return 0;
+
+ // See if we can satisfy the modulus by pulling a scale out of the array
+ // size argument.
+ unsigned ArraySizeScale;
+ uint64_t ArrayOffset;
+ Value *NumElements = // See if the array size is a decomposable linear expr.
+ DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
+
+ // If we can now satisfy the modulus, by using a non-1 scale, we really can
+ // do the xform.
+ if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
+ (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return 0;
+
+ unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
+ Value *Amt = 0;
+ if (Scale == 1) {
+ Amt = NumElements;
+ } else {
+ Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
+ // Insert before the alloca, not before the cast.
+ Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp");
+ }
+
+ if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
+ Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
+ Offset, true);
+ Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp");
+ }
+
+ AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
+ New->setAlignment(AI.getAlignment());
+ New->takeName(&AI);
+
+ // If the allocation has one real use plus a dbg.declare, just remove the
+ // declare.
+ if (DbgDeclareInst *DI = hasOneUsePlusDeclare(&AI)) {
+ EraseInstFromFunction(*(Instruction*)DI);
+ }
+ // If the allocation has multiple real uses, insert a cast and change all
+ // things that used it to use the new cast. This will also hack on CI, but it
+ // will die soon.
+ else if (!AI.hasOneUse()) {
+ // New is the allocation instruction, pointer typed. AI is the original
+ // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
+ Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast");
+ AI.replaceAllUsesWith(NewCast);
+ }
+ return ReplaceInstUsesWith(CI, New);
+}
+
+
+
+/// EvaluateInDifferentType - Given an expression that
+/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
+/// insert the code to evaluate the expression.
+Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
+ bool isSigned) {
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
+ // If we got a constantexpr back, try to simplify it with TD info.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ C = ConstantFoldConstantExpression(CE, TD);
+ return C;
+ }
+
+ // Otherwise, it must be an instruction.
+ Instruction *I = cast<Instruction>(V);
+ Instruction *Res = 0;
+ unsigned Opc = I->getOpcode();
+ switch (Opc) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::AShr:
+ case Instruction::LShr:
+ case Instruction::Shl:
+ case Instruction::UDiv:
+ case Instruction::URem: {
+ Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
+ Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
+ Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+ break;
+ }
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // If the source type of the cast is the type we're trying for then we can
+ // just return the source. There's no need to insert it because it is not
+ // new.
+ if (I->getOperand(0)->getType() == Ty)
+ return I->getOperand(0);
+
+ // Otherwise, must be the same type of cast, so just reinsert a new one.
+ // This also handles the case of zext(trunc(x)) -> zext(x).
+ Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
+ Opc == Instruction::SExt);
+ break;
+ case Instruction::Select: {
+ Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
+ Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
+ Res = SelectInst::Create(I->getOperand(0), True, False);
+ break;
+ }
+ case Instruction::PHI: {
+ PHINode *OPN = cast<PHINode>(I);
+ PHINode *NPN = PHINode::Create(Ty);
+ for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
+ Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
+ NPN->addIncoming(V, OPN->getIncomingBlock(i));
+ }
+ Res = NPN;
+ break;
+ }
+ default:
+ // TODO: Can handle more cases here.
+ llvm_unreachable("Unreachable!");
+ break;
+ }
+
+ Res->takeName(I);
+ return InsertNewInstBefore(Res, *I);
+}
+
+
+/// This function is a wrapper around CastInst::isEliminableCastPair. It
+/// simply extracts arguments and returns what that function returns.
+static Instruction::CastOps
+isEliminableCastPair(
+ const CastInst *CI, ///< The first cast instruction
+ unsigned opcode, ///< The opcode of the second cast instruction
+ const Type *DstTy, ///< The target type for the second cast instruction
+ TargetData *TD ///< The target data for pointer size
+) {
+
+ const Type *SrcTy = CI->getOperand(0)->getType(); // A from above
+ const Type *MidTy = CI->getType(); // B from above
+
+ // Get the opcodes of the two Cast instructions
+ Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
+ Instruction::CastOps secondOp = Instruction::CastOps(opcode);
+
+ unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
+ DstTy,
+ TD ? TD->getIntPtrType(CI->getContext()) : 0);
+
+ // We don't want to form an inttoptr or ptrtoint that converts to an integer
+ // type that differs from the pointer size.
+ if ((Res == Instruction::IntToPtr &&
+ (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) ||
+ (Res == Instruction::PtrToInt &&
+ (!TD || DstTy != TD->getIntPtrType(CI->getContext()))))
+ Res = 0;
+
+ return Instruction::CastOps(Res);
+}
+
+/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
+/// results in any code being generated and is interesting to optimize out. If
+/// the cast can be eliminated by some other simple transformation, we prefer
+/// to do the simplification first.
+bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
+ const Type *Ty) {
+ // Noop casts and casts of constants should be eliminated trivially.
+ if (V->getType() == Ty || isa<Constant>(V)) return false;
+
+ // If this is another cast that can be eliminated, we prefer to have it
+ // eliminated.
+ if (const CastInst *CI = dyn_cast<CastInst>(V))
+ if (isEliminableCastPair(CI, opc, Ty, TD))
+ return false;
+
+ // If this is a vector sext from a compare, then we don't want to break the
+ // idiom where each element of the extended vector is either zero or all ones.
+ if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy())
+ return false;
+
+ return true;
+}
+
+
+/// @brief Implement the transforms common to all CastInst visitors.
+Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
+ Value *Src = CI.getOperand(0);
+
+ // Many cases of "cast of a cast" are eliminable. If it's eliminable we just
+ // eliminate it now.
+ if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
+ if (Instruction::CastOps opc =
+ isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) {
+ // The first cast (CSrc) is eliminable so we need to fix up or replace
+ // the second cast (CI). CSrc will then have a good chance of being dead.
+ return CastInst::Create(opc, CSrc->getOperand(0), CI.getType());
+ }
+ }
+
+ // If we are casting a select then fold the cast into the select
+ if (SelectInst *SI = dyn_cast<SelectInst>(Src))
+ if (Instruction *NV = FoldOpIntoSelect(CI, SI))
+ return NV;
+
+ // If we are casting a PHI then fold the cast into the PHI
+ if (isa<PHINode>(Src)) {
+ // We don't do this if this would create a PHI node with an illegal type if
+ // it is currently legal.
+ if (!Src->getType()->isIntegerTy() ||
+ !CI.getType()->isIntegerTy() ||
+ ShouldChangeType(CI.getType(), Src->getType()))
+ if (Instruction *NV = FoldOpIntoPhi(CI))
+ return NV;
+ }
+
+ return 0;
+}
+
+/// CanEvaluateTruncated - Return true if we can evaluate the specified
+/// expression tree as type Ty instead of its larger type, and arrive with the
+/// same value. This is used by code that tries to eliminate truncates.
+///
+/// Ty will always be a type smaller than V. We should return true if trunc(V)
+/// can be computed by computing V in the smaller type. If V is an instruction,
+/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
+/// makes sense if x and y can be efficiently truncated.
+///
+/// This function works on both vectors and scalars.
+///
+static bool CanEvaluateTruncated(Value *V, const Type *Ty) {
+ // We can always evaluate constants in another type.
+ if (isa<Constant>(V))
+ return true;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ const Type *OrigTy = V->getType();
+
+ // If this is an extension from the dest type, we can eliminate it, even if it
+ // has multiple uses.
+ if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
+ I->getOperand(0)->getType() == Ty)
+ return true;
+
+ // We can't extend or shrink something that has multiple uses: doing so would
+ // require duplicating the instruction in general, which isn't profitable.
+ if (!I->hasOneUse()) return false;
+
+ unsigned Opc = I->getOpcode();
+ switch (Opc) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // These operators can all arbitrarily be extended or truncated.
+ return CanEvaluateTruncated(I->getOperand(0), Ty) &&
+ CanEvaluateTruncated(I->getOperand(1), Ty);
+
+ case Instruction::UDiv:
+ case Instruction::URem: {
+ // UDiv and URem can be truncated if all the truncated bits are zero.
+ uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ if (BitWidth < OrigBitWidth) {
+ APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth);
+ if (MaskedValueIsZero(I->getOperand(0), Mask) &&
+ MaskedValueIsZero(I->getOperand(1), Mask)) {
+ return CanEvaluateTruncated(I->getOperand(0), Ty) &&
+ CanEvaluateTruncated(I->getOperand(1), Ty);
+ }
+ }
+ break;
+ }
+ case Instruction::Shl:
+ // If we are truncating the result of this SHL, and if it's a shift of a
+ // constant amount, we can always perform a SHL in a smaller type.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ if (CI->getLimitedValue(BitWidth) < BitWidth)
+ return CanEvaluateTruncated(I->getOperand(0), Ty);
+ }
+ break;
+ case Instruction::LShr:
+ // If this is a truncate of a logical shr, we can truncate it to a smaller
+ // lshr iff we know that the bits we would otherwise be shifting in are
+ // already zeros.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ if (MaskedValueIsZero(I->getOperand(0),
+ APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
+ CI->getLimitedValue(BitWidth) < BitWidth) {
+ return CanEvaluateTruncated(I->getOperand(0), Ty);
+ }
+ }
+ break;
+ case Instruction::Trunc:
+ // trunc(trunc(x)) -> trunc(x)
+ return true;
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
+ // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
+ return true;
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(I);
+ return CanEvaluateTruncated(SI->getTrueValue(), Ty) &&
+ CanEvaluateTruncated(SI->getFalseValue(), Ty);
+ }
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!CanEvaluateTruncated(PN->getIncomingValue(i), Ty))
+ return false;
+ return true;
+ }
+ default:
+ // TODO: Can handle more cases here.
+ break;
+ }
+
+ return false;
+}
+
+Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
+ if (Instruction *Result = commonCastTransforms(CI))
+ return Result;
+
+ // See if we can simplify any instructions used by the input whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(CI))
+ return &CI;
+
+ Value *Src = CI.getOperand(0);
+ const Type *DestTy = CI.getType(), *SrcTy = Src->getType();
+
+ // Attempt to truncate the entire input expression tree to the destination
+ // type. Only do this if the dest type is a simple type, don't convert the
+ // expression tree to something weird like i93 unless the source is also
+ // strange.
+ if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+ CanEvaluateTruncated(Src, DestTy)) {
+
+ // If this cast is a truncate, evaluting in a different type always
+ // eliminates the cast, so it is always a win.
+ DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+ " to avoid cast: " << CI << '\n');
+ Value *Res = EvaluateInDifferentType(Src, DestTy, false);
+ assert(Res->getType() == DestTy);
+ return ReplaceInstUsesWith(CI, Res);
+ }
+
+ // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector.
+ if (DestTy->getScalarSizeInBits() == 1) {
+ Constant *One = ConstantInt::get(Src->getType(), 1);
+ Src = Builder->CreateAnd(Src, One, "tmp");
+ Value *Zero = Constant::getNullValue(Src->getType());
+ return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
+ }
+
+ // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
+ Value *A = 0; ConstantInt *Cst = 0;
+ if (match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst))) &&
+ Src->hasOneUse()) {
+ // We have three types to worry about here, the type of A, the source of
+ // the truncate (MidSize), and the destination of the truncate. We know that
+ // ASize < MidSize and MidSize > ResultSize, but don't know the relation
+ // between ASize and ResultSize.
+ unsigned ASize = A->getType()->getPrimitiveSizeInBits();
+
+ // If the shift amount is larger than the size of A, then the result is
+ // known to be zero because all the input bits got shifted out.
+ if (Cst->getZExtValue() >= ASize)
+ return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType()));
+
+ // Since we're doing an lshr and a zero extend, and know that the shift
+ // amount is smaller than ASize, it is always safe to do the shift in A's
+ // type, then zero extend or truncate to the result.
+ Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue());
+ Shift->takeName(Src);
+ return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
+ }
+
+ return 0;
+}
+
+/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations
+/// in order to eliminate the icmp.
+Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
+ bool DoXform) {
+ // If we are just checking for a icmp eq of a single bit and zext'ing it
+ // to an integer, then shift the bit to the appropriate place and then
+ // cast to integer to avoid the comparison.
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+ const APInt &Op1CV = Op1C->getValue();
+
+ // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
+ // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
+ if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
+ (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) {
+ if (!DoXform) return ICI;
+
+ Value *In = ICI->getOperand(0);
+ Value *Sh = ConstantInt::get(In->getType(),
+ In->getType()->getScalarSizeInBits()-1);
+ In = Builder->CreateLShr(In, Sh, In->getName()+".lobit");
+ if (In->getType() != CI.getType())
+ In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp");
+
+ if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
+ Constant *One = ConstantInt::get(In->getType(), 1);
+ In = Builder->CreateXor(In, One, In->getName()+".not");
+ }
+
+ return ReplaceInstUsesWith(CI, In);
+ }
+
+
+
+ // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
+ // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
+ // zext (X == 1) to i32 --> X iff X has only the low bit set.
+ // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set.
+ // zext (X != 0) to i32 --> X iff X has only the low bit set.
+ // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
+ // zext (X != 1) to i32 --> X^1 iff X has only the low bit set.
+ // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
+ if ((Op1CV == 0 || Op1CV.isPowerOf2()) &&
+ // This only works for EQ and NE
+ ICI->isEquality()) {
+ // If Op1C some other power of two, convert:
+ uint32_t BitWidth = Op1C->getType()->getBitWidth();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ APInt TypeMask(APInt::getAllOnesValue(BitWidth));
+ ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne);
+
+ APInt KnownZeroMask(~KnownZero);
+ if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
+ if (!DoXform) return ICI;
+
+ bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE;
+ if (Op1CV != 0 && (Op1CV != KnownZeroMask)) {
+ // (X&4) == 2 --> false
+ // (X&4) != 2 --> true
+ Constant *Res = ConstantInt::get(Type::getInt1Ty(CI.getContext()),
+ isNE);
+ Res = ConstantExpr::getZExt(Res, CI.getType());
+ return ReplaceInstUsesWith(CI, Res);
+ }
+
+ uint32_t ShiftAmt = KnownZeroMask.logBase2();
+ Value *In = ICI->getOperand(0);
+ if (ShiftAmt) {
+ // Perform a logical shr by shiftamt.
+ // Insert the shift to put the result in the low bit.
+ In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt),
+ In->getName()+".lobit");
+ }
+
+ if ((Op1CV != 0) == isNE) { // Toggle the low bit.
+ Constant *One = ConstantInt::get(In->getType(), 1);
+ In = Builder->CreateXor(In, One, "tmp");
+ }
+
+ if (CI.getType() == In->getType())
+ return ReplaceInstUsesWith(CI, In);
+ return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
+ }
+ }
+ }
+
+ // icmp ne A, B is equal to xor A, B when A and B only really have one bit.
+ // It is also profitable to transform icmp eq into not(xor(A, B)) because that
+ // may lead to additional simplifications.
+ if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) {
+ if (const IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) {
+ uint32_t BitWidth = ITy->getBitWidth();
+ Value *LHS = ICI->getOperand(0);
+ Value *RHS = ICI->getOperand(1);
+
+ APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
+ APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
+ APInt TypeMask(APInt::getAllOnesValue(BitWidth));
+ ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS);
+ ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS);
+
+ if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) {
+ APInt KnownBits = KnownZeroLHS | KnownOneLHS;
+ APInt UnknownBit = ~KnownBits;
+ if (UnknownBit.countPopulation() == 1) {
+ if (!DoXform) return ICI;
+
+ Value *Result = Builder->CreateXor(LHS, RHS);
+
+ // Mask off any bits that are set and won't be shifted away.
+ if (KnownOneLHS.uge(UnknownBit))
+ Result = Builder->CreateAnd(Result,
+ ConstantInt::get(ITy, UnknownBit));
+
+ // Shift the bit we're testing down to the lsb.
+ Result = Builder->CreateLShr(
+ Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros()));
+
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1));
+ Result->takeName(ICI);
+ return ReplaceInstUsesWith(CI, Result);
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// CanEvaluateZExtd - Determine if the specified value can be computed in the
+/// specified wider type and produce the same low bits. If not, return false.
+///
+/// If this function returns true, it can also return a non-zero number of bits
+/// (in BitsToClear) which indicates that the value it computes is correct for
+/// the zero extend, but that the additional BitsToClear bits need to be zero'd
+/// out. For example, to promote something like:
+///
+/// %B = trunc i64 %A to i32
+/// %C = lshr i32 %B, 8
+/// %E = zext i32 %C to i64
+///
+/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
+/// set to 8 to indicate that the promoted value needs to have bits 24-31
+/// cleared in addition to bits 32-63. Since an 'and' will be generated to
+/// clear the top bits anyway, doing this has no extra cost.
+///
+/// This function works on both vectors and scalars.
+static bool CanEvaluateZExtd(Value *V, const Type *Ty, unsigned &BitsToClear) {
+ BitsToClear = 0;
+ if (isa<Constant>(V))
+ return true;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // If the input is a truncate from the destination type, we can trivially
+ // eliminate it, even if it has multiple uses.
+ // FIXME: This is currently disabled until codegen can handle this without
+ // pessimizing code, PR5997.
+ if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
+ return true;
+
+ // We can't extend or shrink something that has multiple uses: doing so would
+ // require duplicating the instruction in general, which isn't profitable.
+ if (!I->hasOneUse()) return false;
+
+ unsigned Opc = I->getOpcode(), Tmp;
+ switch (Opc) {
+ case Instruction::ZExt: // zext(zext(x)) -> zext(x).
+ case Instruction::SExt: // zext(sext(x)) -> sext(x).
+ case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
+ return true;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::Shl:
+ if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear) ||
+ !CanEvaluateZExtd(I->getOperand(1), Ty, Tmp))
+ return false;
+ // These can all be promoted if neither operand has 'bits to clear'.
+ if (BitsToClear == 0 && Tmp == 0)
+ return true;
+
+ // If the operation is an AND/OR/XOR and the bits to clear are zero in the
+ // other side, BitsToClear is ok.
+ if (Tmp == 0 &&
+ (Opc == Instruction::And || Opc == Instruction::Or ||
+ Opc == Instruction::Xor)) {
+ // We use MaskedValueIsZero here for generality, but the case we care
+ // about the most is constant RHS.
+ unsigned VSize = V->getType()->getScalarSizeInBits();
+ if (MaskedValueIsZero(I->getOperand(1),
+ APInt::getHighBitsSet(VSize, BitsToClear)))
+ return true;
+ }
+
+ // Otherwise, we don't know how to analyze this BitsToClear case yet.
+ return false;
+
+ case Instruction::LShr:
+ // We can promote lshr(x, cst) if we can promote x. This requires the
+ // ultimate 'and' to clear out the high zero bits we're clearing out though.
+ if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear))
+ return false;
+ BitsToClear += Amt->getZExtValue();
+ if (BitsToClear > V->getType()->getScalarSizeInBits())
+ BitsToClear = V->getType()->getScalarSizeInBits();
+ return true;
+ }
+ // Cannot promote variable LSHR.
+ return false;
+ case Instruction::Select:
+ if (!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp) ||
+ !CanEvaluateZExtd(I->getOperand(2), Ty, BitsToClear) ||
+ // TODO: If important, we could handle the case when the BitsToClear are
+ // known zero in the disagreeing side.
+ Tmp != BitsToClear)
+ return false;
+ return true;
+
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear))
+ return false;
+ for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp) ||
+ // TODO: If important, we could handle the case when the BitsToClear
+ // are known zero in the disagreeing input.
+ Tmp != BitsToClear)
+ return false;
+ return true;
+ }
+ default:
+ // TODO: Can handle more cases here.
+ return false;
+ }
+}
+
+Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
+ // If this zero extend is only used by a truncate, let the truncate by
+ // eliminated before we try to optimize this zext.
+ if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
+ return 0;
+
+ // If one of the common conversion will work, do it.
+ if (Instruction *Result = commonCastTransforms(CI))
+ return Result;
+
+ // See if we can simplify any instructions used by the input whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(CI))
+ return &CI;
+
+ Value *Src = CI.getOperand(0);
+ const Type *SrcTy = Src->getType(), *DestTy = CI.getType();
+
+ // Attempt to extend the entire input expression tree to the destination
+ // type. Only do this if the dest type is a simple type, don't convert the
+ // expression tree to something weird like i93 unless the source is also
+ // strange.
+ unsigned BitsToClear;
+ if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+ CanEvaluateZExtd(Src, DestTy, BitsToClear)) {
+ assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
+ "Unreasonable BitsToClear");
+
+ // Okay, we can transform this! Insert the new expression now.
+ DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+ " to avoid zero extend: " << CI);
+ Value *Res = EvaluateInDifferentType(Src, DestTy, false);
+ assert(Res->getType() == DestTy);
+
+ uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear;
+ uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+
+ // If the high bits are already filled with zeros, just replace this
+ // cast with the result.
+ if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize,
+ DestBitSize-SrcBitsKept)))
+ return ReplaceInstUsesWith(CI, Res);
+
+ // We need to emit an AND to clear the high bits.
+ Constant *C = ConstantInt::get(Res->getType(),
+ APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
+ return BinaryOperator::CreateAnd(Res, C);
+ }
+
+ // If this is a TRUNC followed by a ZEXT then we are dealing with integral
+ // types and if the sizes are just right we can convert this into a logical
+ // 'and' which will be much cheaper than the pair of casts.
+ if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
+ // TODO: Subsume this into EvaluateInDifferentType.
+
+ // Get the sizes of the types involved. We know that the intermediate type
+ // will be smaller than A or C, but don't know the relation between A and C.
+ Value *A = CSrc->getOperand(0);
+ unsigned SrcSize = A->getType()->getScalarSizeInBits();
+ unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
+ unsigned DstSize = CI.getType()->getScalarSizeInBits();
+ // If we're actually extending zero bits, then if
+ // SrcSize < DstSize: zext(a & mask)
+ // SrcSize == DstSize: a & mask
+ // SrcSize > DstSize: trunc(a) & mask
+ if (SrcSize < DstSize) {
+ APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
+ Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
+ Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask");
+ return new ZExtInst(And, CI.getType());
+ }
+
+ if (SrcSize == DstSize) {
+ APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
+ return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
+ AndValue));
+ }
+ if (SrcSize > DstSize) {
+ Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp");
+ APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
+ return BinaryOperator::CreateAnd(Trunc,
+ ConstantInt::get(Trunc->getType(),
+ AndValue));
+ }
+ }
+
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
+ return transformZExtICmp(ICI, CI);
+
+ BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src);
+ if (SrcI && SrcI->getOpcode() == Instruction::Or) {
+ // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one
+ // of the (zext icmp) will be transformed.
+ ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
+ ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
+ if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
+ (transformZExtICmp(LHS, CI, false) ||
+ transformZExtICmp(RHS, CI, false))) {
+ Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName());
+ Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName());
+ return BinaryOperator::Create(Instruction::Or, LCast, RCast);
+ }
+ }
+
+ // zext(trunc(t) & C) -> (t & zext(C)).
+ if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse())
+ if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
+ if (TruncInst *TI = dyn_cast<TruncInst>(SrcI->getOperand(0))) {
+ Value *TI0 = TI->getOperand(0);
+ if (TI0->getType() == CI.getType())
+ return
+ BinaryOperator::CreateAnd(TI0,
+ ConstantExpr::getZExt(C, CI.getType()));
+ }
+
+ // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)).
+ if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse())
+ if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
+ if (BinaryOperator *And = dyn_cast<BinaryOperator>(SrcI->getOperand(0)))
+ if (And->getOpcode() == Instruction::And && And->hasOneUse() &&
+ And->getOperand(1) == C)
+ if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) {
+ Value *TI0 = TI->getOperand(0);
+ if (TI0->getType() == CI.getType()) {
+ Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
+ Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp");
+ return BinaryOperator::CreateXor(NewAnd, ZC);
+ }
+ }
+
+ // zext (xor i1 X, true) to i32 --> xor (zext i1 X to i32), 1
+ Value *X;
+ if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isIntegerTy(1) &&
+ match(SrcI, m_Not(m_Value(X))) &&
+ (!X->hasOneUse() || !isa<CmpInst>(X))) {
+ Value *New = Builder->CreateZExt(X, CI.getType());
+ return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1));
+ }
+
+ return 0;
+}
+
+/// CanEvaluateSExtd - Return true if we can take the specified value
+/// and return it as type Ty without inserting any new casts and without
+/// changing the value of the common low bits. This is used by code that tries
+/// to promote integer operations to a wider types will allow us to eliminate
+/// the extension.
+///
+/// This function works on both vectors and scalars.
+///
+static bool CanEvaluateSExtd(Value *V, const Type *Ty) {
+ assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
+ "Can't sign extend type to a smaller type");
+ // If this is a constant, it can be trivially promoted.
+ if (isa<Constant>(V))
+ return true;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // If this is a truncate from the dest type, we can trivially eliminate it,
+ // even if it has multiple uses.
+ // FIXME: This is currently disabled until codegen can handle this without
+ // pessimizing code, PR5997.
+ if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
+ return true;
+
+ // We can't extend or shrink something that has multiple uses: doing so would
+ // require duplicating the instruction in general, which isn't profitable.
+ if (!I->hasOneUse()) return false;
+
+ switch (I->getOpcode()) {
+ case Instruction::SExt: // sext(sext(x)) -> sext(x)
+ case Instruction::ZExt: // sext(zext(x)) -> zext(x)
+ case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
+ return true;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // These operators can all arbitrarily be extended if their inputs can.
+ return CanEvaluateSExtd(I->getOperand(0), Ty) &&
+ CanEvaluateSExtd(I->getOperand(1), Ty);
+
+ //case Instruction::Shl: TODO
+ //case Instruction::LShr: TODO
+
+ case Instruction::Select:
+ return CanEvaluateSExtd(I->getOperand(1), Ty) &&
+ CanEvaluateSExtd(I->getOperand(2), Ty);
+
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!CanEvaluateSExtd(PN->getIncomingValue(i), Ty)) return false;
+ return true;
+ }
+ default:
+ // TODO: Can handle more cases here.
+ break;
+ }
+
+ return false;
+}
+
+Instruction *InstCombiner::visitSExt(SExtInst &CI) {
+ // If this sign extend is only used by a truncate, let the truncate by
+ // eliminated before we try to optimize this zext.
+ if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
+ return 0;
+
+ if (Instruction *I = commonCastTransforms(CI))
+ return I;
+
+ // See if we can simplify any instructions used by the input whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(CI))
+ return &CI;
+
+ Value *Src = CI.getOperand(0);
+ const Type *SrcTy = Src->getType(), *DestTy = CI.getType();
+
+ // Attempt to extend the entire input expression tree to the destination
+ // type. Only do this if the dest type is a simple type, don't convert the
+ // expression tree to something weird like i93 unless the source is also
+ // strange.
+ if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+ CanEvaluateSExtd(Src, DestTy)) {
+ // Okay, we can transform this! Insert the new expression now.
+ DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+ " to avoid sign extend: " << CI);
+ Value *Res = EvaluateInDifferentType(Src, DestTy, true);
+ assert(Res->getType() == DestTy);
+
+ uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
+ uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+
+ // If the high bits are already filled with sign bit, just replace this
+ // cast with the result.
+ if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize)
+ return ReplaceInstUsesWith(CI, Res);
+
+ // We need to emit a shl + ashr to do the sign extend.
+ Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
+ return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"),
+ ShAmt);
+ }
+
+ // If this input is a trunc from our destination, then turn sext(trunc(x))
+ // into shifts.
+ if (TruncInst *TI = dyn_cast<TruncInst>(Src))
+ if (TI->hasOneUse() && TI->getOperand(0)->getType() == DestTy) {
+ uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
+ uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+
+ // We need to emit a shl + ashr to do the sign extend.
+ Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
+ Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext");
+ return BinaryOperator::CreateAShr(Res, ShAmt);
+ }
+
+
+ // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed
+ // (x >s -1) ? -1 : 0 -> ashr x, 31 -> all ones if not signed
+ {
+ ICmpInst::Predicate Pred; Value *CmpLHS; ConstantInt *CmpRHS;
+ if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_ConstantInt(CmpRHS)))) {
+ // sext (x <s 0) to i32 --> x>>s31 true if signbit set.
+ // sext (x >s -1) to i32 --> (x>>s31)^-1 true if signbit clear.
+ if ((Pred == ICmpInst::ICMP_SLT && CmpRHS->isZero()) ||
+ (Pred == ICmpInst::ICMP_SGT && CmpRHS->isAllOnesValue())) {
+ Value *Sh = ConstantInt::get(CmpLHS->getType(),
+ CmpLHS->getType()->getScalarSizeInBits()-1);
+ Value *In = Builder->CreateAShr(CmpLHS, Sh, CmpLHS->getName()+".lobit");
+ if (In->getType() != CI.getType())
+ In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp");
+
+ if (Pred == ICmpInst::ICMP_SGT)
+ In = Builder->CreateNot(In, In->getName()+".not");
+ return ReplaceInstUsesWith(CI, In);
+ }
+ }
+ }
+
+
+ // If the input is a shl/ashr pair of a same constant, then this is a sign
+ // extension from a smaller value. If we could trust arbitrary bitwidth
+ // integers, we could turn this into a truncate to the smaller bit and then
+ // use a sext for the whole extension. Since we don't, look deeper and check
+ // for a truncate. If the source and dest are the same type, eliminate the
+ // trunc and extend and just do shifts. For example, turn:
+ // %a = trunc i32 %i to i8
+ // %b = shl i8 %a, 6
+ // %c = ashr i8 %b, 6
+ // %d = sext i8 %c to i32
+ // into:
+ // %a = shl i32 %i, 30
+ // %d = ashr i32 %a, 30
+ Value *A = 0;
+ // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
+ ConstantInt *BA = 0, *CA = 0;
+ if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_ConstantInt(BA)),
+ m_ConstantInt(CA))) &&
+ BA == CA && A->getType() == CI.getType()) {
+ unsigned MidSize = Src->getType()->getScalarSizeInBits();
+ unsigned SrcDstSize = CI.getType()->getScalarSizeInBits();
+ unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize;
+ Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt);
+ A = Builder->CreateShl(A, ShAmtV, CI.getName());
+ return BinaryOperator::CreateAShr(A, ShAmtV);
+ }
+
+ return 0;
+}
+
+
+/// FitsInFPType - Return a Constant* for the specified FP constant if it fits
+/// in the specified FP type without changing its value.
+static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
+ bool losesInfo;
+ APFloat F = CFP->getValueAPF();
+ (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
+ if (!losesInfo)
+ return ConstantFP::get(CFP->getContext(), F);
+ return 0;
+}
+
+/// LookThroughFPExtensions - If this is an fp extension instruction, look
+/// through it until we get the source value.
+static Value *LookThroughFPExtensions(Value *V) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (I->getOpcode() == Instruction::FPExt)
+ return LookThroughFPExtensions(I->getOperand(0));
+
+ // If this value is a constant, return the constant in the smallest FP type
+ // that can accurately represent it. This allows us to turn
+ // (float)((double)X+2.0) into x+2.0f.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+ if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext()))
+ return V; // No constant folding of this.
+ // See if the value can be truncated to float and then reextended.
+ if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle))
+ return V;
+ if (CFP->getType()->isDoubleTy())
+ return V; // Won't shrink.
+ if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble))
+ return V;
+ // Don't try to shrink to various long double types.
+ }
+
+ return V;
+}
+
+Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
+ if (Instruction *I = commonCastTransforms(CI))
+ return I;
+
+ // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are
+ // smaller than the destination type, we can eliminate the truncate by doing
+ // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well
+ // as many builtins (sqrt, etc).
+ BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0));
+ if (OpI && OpI->hasOneUse()) {
+ switch (OpI->getOpcode()) {
+ default: break;
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ const Type *SrcTy = OpI->getType();
+ Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0));
+ Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
+ if (LHSTrunc->getType() != SrcTy &&
+ RHSTrunc->getType() != SrcTy) {
+ unsigned DstSize = CI.getType()->getScalarSizeInBits();
+ // If the source types were both smaller than the destination type of
+ // the cast, do this xform.
+ if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize &&
+ RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) {
+ LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType());
+ RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType());
+ return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc);
+ }
+ }
+ break;
+ }
+ }
+
+ // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
+ // NOTE: This should be disabled by -fno-builtin-sqrt if we ever support it.
+ CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
+ if (Call && Call->getCalledFunction() &&
+ Call->getCalledFunction()->getName() == "sqrt" &&
+ Call->getNumArgOperands() == 1) {
+ CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
+ if (Arg && Arg->getOpcode() == Instruction::FPExt &&
+ CI.getType()->isFloatTy() &&
+ Call->getType()->isDoubleTy() &&
+ Arg->getType()->isDoubleTy() &&
+ Arg->getOperand(0)->getType()->isFloatTy()) {
+ Function *Callee = Call->getCalledFunction();
+ Module *M = CI.getParent()->getParent()->getParent();
+ Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf",
+ Callee->getAttributes(),
+ Builder->getFloatTy(),
+ Builder->getFloatTy(),
+ NULL);
+ CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
+ "sqrtfcall");
+ ret->setAttributes(Callee->getAttributes());
+
+
+ // Remove the old Call. With -fmath-errno, it won't get marked readnone.
+ Call->replaceAllUsesWith(UndefValue::get(Call->getType()));
+ EraseInstFromFunction(*Call);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitFPExt(CastInst &CI) {
+ return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
+ Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
+ if (OpI == 0)
+ return commonCastTransforms(FI);
+
+ // fptoui(uitofp(X)) --> X
+ // fptoui(sitofp(X)) --> X
+ // This is safe if the intermediate type has enough bits in its mantissa to
+ // accurately represent all values of X. For example, do not do this with
+ // i64->float->i64. This is also safe for sitofp case, because any negative
+ // 'X' value would cause an undefined result for the fptoui.
+ if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
+ OpI->getOperand(0)->getType() == FI.getType() &&
+ (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */
+ OpI->getType()->getFPMantissaWidth())
+ return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+
+ return commonCastTransforms(FI);
+}
+
+Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
+ Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
+ if (OpI == 0)
+ return commonCastTransforms(FI);
+
+ // fptosi(sitofp(X)) --> X
+ // fptosi(uitofp(X)) --> X
+ // This is safe if the intermediate type has enough bits in its mantissa to
+ // accurately represent all values of X. For example, do not do this with
+ // i64->float->i64. This is also safe for sitofp case, because any negative
+ // 'X' value would cause an undefined result for the fptoui.
+ if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
+ OpI->getOperand(0)->getType() == FI.getType() &&
+ (int)FI.getType()->getScalarSizeInBits() <=
+ OpI->getType()->getFPMantissaWidth())
+ return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+
+ return commonCastTransforms(FI);
+}
+
+Instruction *InstCombiner::visitUIToFP(CastInst &CI) {
+ return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitSIToFP(CastInst &CI) {
+ return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
+ // If the source integer type is not the intptr_t type for this target, do a
+ // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
+ // cast to be exposed to other transforms.
+ if (TD) {
+ if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
+ TD->getPointerSizeInBits()) {
+ Value *P = Builder->CreateTrunc(CI.getOperand(0),
+ TD->getIntPtrType(CI.getContext()), "tmp");
+ return new IntToPtrInst(P, CI.getType());
+ }
+ if (CI.getOperand(0)->getType()->getScalarSizeInBits() <
+ TD->getPointerSizeInBits()) {
+ Value *P = Builder->CreateZExt(CI.getOperand(0),
+ TD->getIntPtrType(CI.getContext()), "tmp");
+ return new IntToPtrInst(P, CI.getType());
+ }
+ }
+
+ if (Instruction *I = commonCastTransforms(CI))
+ return I;
+
+ return 0;
+}
+
+/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
+Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
+ Value *Src = CI.getOperand(0);
+
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
+ // If casting the result of a getelementptr instruction with no offset, turn
+ // this into a cast of the original pointer!
+ if (GEP->hasAllZeroIndices()) {
+ // Changing the cast operand is usually not a good idea but it is safe
+ // here because the pointer operand is being replaced with another
+ // pointer operand so the opcode doesn't need to change.
+ Worklist.Add(GEP);
+ CI.setOperand(0, GEP->getOperand(0));
+ return &CI;
+ }
+
+ // If the GEP has a single use, and the base pointer is a bitcast, and the
+ // GEP computes a constant offset, see if we can convert these three
+ // instructions into fewer. This typically happens with unions and other
+ // non-type-safe code.
+ if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0)) &&
+ GEP->hasAllConstantIndices()) {
+ // We are guaranteed to get a constant from EmitGEPOffset.
+ ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(GEP));
+ int64_t Offset = OffsetV->getSExtValue();
+
+ // Get the base pointer input of the bitcast, and the type it points to.
+ Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
+ const Type *GEPIdxTy =
+ cast<PointerType>(OrigBase->getType())->getElementType();
+ SmallVector<Value*, 8> NewIndices;
+ if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices)) {
+ // If we were able to index down into an element, create the GEP
+ // and bitcast the result. This eliminates one bitcast, potentially
+ // two.
+ Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
+ Builder->CreateInBoundsGEP(OrigBase,
+ NewIndices.begin(), NewIndices.end()) :
+ Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end());
+ NGEP->takeName(GEP);
+
+ if (isa<BitCastInst>(CI))
+ return new BitCastInst(NGEP, CI.getType());
+ assert(isa<PtrToIntInst>(CI));
+ return new PtrToIntInst(NGEP, CI.getType());
+ }
+ }
+ }
+
+ return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
+ // If the destination integer type is not the intptr_t type for this target,
+ // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
+ // to be exposed to other transforms.
+ if (TD) {
+ if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
+ Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
+ TD->getIntPtrType(CI.getContext()),
+ "tmp");
+ return new TruncInst(P, CI.getType());
+ }
+ if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) {
+ Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
+ TD->getIntPtrType(CI.getContext()),
+ "tmp");
+ return new ZExtInst(P, CI.getType());
+ }
+ }
+
+ return commonPointerCastTransforms(CI);
+}
+
+/// OptimizeVectorResize - This input value (which is known to have vector type)
+/// is being zero extended or truncated to the specified vector type. Try to
+/// replace it with a shuffle (and vector/vector bitcast) if possible.
+///
+/// The source and destination vector types may have different element types.
+static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
+ InstCombiner &IC) {
+ // We can only do this optimization if the output is a multiple of the input
+ // element size, or the input is a multiple of the output element size.
+ // Convert the input type to have the same element type as the output.
+ const VectorType *SrcTy = cast<VectorType>(InVal->getType());
+
+ if (SrcTy->getElementType() != DestTy->getElementType()) {
+ // The input types don't need to be identical, but for now they must be the
+ // same size. There is no specific reason we couldn't handle things like
+ // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
+ // there yet.
+ if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
+ DestTy->getElementType()->getPrimitiveSizeInBits())
+ return 0;
+
+ SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
+ InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
+ }
+
+ // Now that the element types match, get the shuffle mask and RHS of the
+ // shuffle to use, which depends on whether we're increasing or decreasing the
+ // size of the input.
+ SmallVector<Constant*, 16> ShuffleMask;
+ Value *V2;
+ const IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext());
+
+ if (SrcTy->getNumElements() > DestTy->getNumElements()) {
+ // If we're shrinking the number of elements, just shuffle in the low
+ // elements from the input and use undef as the second shuffle input.
+ V2 = UndefValue::get(SrcTy);
+ for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
+ ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+
+ } else {
+ // If we're increasing the number of elements, shuffle in all of the
+ // elements from InVal and fill the rest of the result elements with zeros
+ // from a constant zero.
+ V2 = Constant::getNullValue(SrcTy);
+ unsigned SrcElts = SrcTy->getNumElements();
+ for (unsigned i = 0, e = SrcElts; i != e; ++i)
+ ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+
+ // The excess elements reference the first element of the zero input.
+ ShuffleMask.append(DestTy->getNumElements()-SrcElts,
+ ConstantInt::get(Int32Ty, SrcElts));
+ }
+
+ Constant *Mask = ConstantVector::get(ShuffleMask.data(), ShuffleMask.size());
+ return new ShuffleVectorInst(InVal, V2, Mask);
+}
+
+static bool isMultipleOfTypeSize(unsigned Value, const Type *Ty) {
+ return Value % Ty->getPrimitiveSizeInBits() == 0;
+}
+
+static unsigned getTypeSizeIndex(unsigned Value, const Type *Ty) {
+ return Value / Ty->getPrimitiveSizeInBits();
+}
+
+/// CollectInsertionElements - V is a value which is inserted into a vector of
+/// VecEltTy. Look through the value to see if we can decompose it into
+/// insertions into the vector. See the example in the comment for
+/// OptimizeIntegerToVectorInsertions for the pattern this handles.
+/// The type of V is always a non-zero multiple of VecEltTy's size.
+///
+/// This returns false if the pattern can't be matched or true if it can,
+/// filling in Elements with the elements found here.
+static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
+ SmallVectorImpl<Value*> &Elements,
+ const Type *VecEltTy) {
+ // Undef values never contribute useful bits to the result.
+ if (isa<UndefValue>(V)) return true;
+
+ // If we got down to a value of the right type, we win, try inserting into the
+ // right element.
+ if (V->getType() == VecEltTy) {
+ // Inserting null doesn't actually insert any elements.
+ if (Constant *C = dyn_cast<Constant>(V))
+ if (C->isNullValue())
+ return true;
+
+ // Fail if multiple elements are inserted into this slot.
+ if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
+ return false;
+
+ Elements[ElementIndex] = V;
+ return true;
+ }
+
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ // Figure out the # elements this provides, and bitcast it or slice it up
+ // as required.
+ unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
+ VecEltTy);
+ // If the constant is the size of a vector element, we just need to bitcast
+ // it to the right type so it gets properly inserted.
+ if (NumElts == 1)
+ return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
+ ElementIndex, Elements, VecEltTy);
+
+ // Okay, this is a constant that covers multiple elements. Slice it up into
+ // pieces and insert each element-sized piece into the vector.
+ if (!isa<IntegerType>(C->getType()))
+ C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
+ C->getType()->getPrimitiveSizeInBits()));
+ unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
+ const Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
+ i*ElementSize));
+ Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
+ if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy))
+ return false;
+ }
+ return true;
+ }
+
+ if (!V->hasOneUse()) return false;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return false;
+ switch (I->getOpcode()) {
+ default: return false; // Unhandled case.
+ case Instruction::BitCast:
+ return CollectInsertionElements(I->getOperand(0), ElementIndex,
+ Elements, VecEltTy);
+ case Instruction::ZExt:
+ if (!isMultipleOfTypeSize(
+ I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
+ VecEltTy))
+ return false;
+ return CollectInsertionElements(I->getOperand(0), ElementIndex,
+ Elements, VecEltTy);
+ case Instruction::Or:
+ return CollectInsertionElements(I->getOperand(0), ElementIndex,
+ Elements, VecEltTy) &&
+ CollectInsertionElements(I->getOperand(1), ElementIndex,
+ Elements, VecEltTy);
+ case Instruction::Shl: {
+ // Must be shifting by a constant that is a multiple of the element size.
+ ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (CI == 0) return false;
+ if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false;
+ unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy);
+
+ return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift,
+ Elements, VecEltTy);
+ }
+
+ }
+}
+
+
+/// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we
+/// may be doing shifts and ors to assemble the elements of the vector manually.
+/// Try to rip the code out and replace it with insertelements. This is to
+/// optimize code like this:
+///
+/// %tmp37 = bitcast float %inc to i32
+/// %tmp38 = zext i32 %tmp37 to i64
+/// %tmp31 = bitcast float %inc5 to i32
+/// %tmp32 = zext i32 %tmp31 to i64
+/// %tmp33 = shl i64 %tmp32, 32
+/// %ins35 = or i64 %tmp33, %tmp38
+/// %tmp43 = bitcast i64 %ins35 to <2 x float>
+///
+/// Into two insertelements that do "buildvector{%inc, %inc5}".
+static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
+ InstCombiner &IC) {
+ const VectorType *DestVecTy = cast<VectorType>(CI.getType());
+ Value *IntInput = CI.getOperand(0);
+
+ SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
+ if (!CollectInsertionElements(IntInput, 0, Elements,
+ DestVecTy->getElementType()))
+ return 0;
+
+ // If we succeeded, we know that all of the element are specified by Elements
+ // or are zero if Elements has a null entry. Recast this as a set of
+ // insertions.
+ Value *Result = Constant::getNullValue(CI.getType());
+ for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
+ if (Elements[i] == 0) continue; // Unset element.
+
+ Result = IC.Builder->CreateInsertElement(Result, Elements[i],
+ IC.Builder->getInt32(i));
+ }
+
+ return Result;
+}
+
+
+/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
+/// bitcast. The various long double bitcasts can't get in here.
+static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
+ Value *Src = CI.getOperand(0);
+ const Type *DestTy = CI.getType();
+
+ // If this is a bitcast from int to float, check to see if the int is an
+ // extraction from a vector.
+ Value *VecInput = 0;
+ // bitcast(trunc(bitcast(somevector)))
+ if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
+ isa<VectorType>(VecInput->getType())) {
+ const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+ unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+
+ if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) {
+ // If the element type of the vector doesn't match the result type,
+ // bitcast it to be a vector type we can extract from.
+ if (VecTy->getElementType() != DestTy) {
+ VecTy = VectorType::get(DestTy,
+ VecTy->getPrimitiveSizeInBits() / DestWidth);
+ VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+ }
+
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0));
+ }
+ }
+
+ // bitcast(trunc(lshr(bitcast(somevector), cst))
+ ConstantInt *ShAmt = 0;
+ if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
+ m_ConstantInt(ShAmt)))) &&
+ isa<VectorType>(VecInput->getType())) {
+ const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+ unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+ if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 &&
+ ShAmt->getZExtValue() % DestWidth == 0) {
+ // If the element type of the vector doesn't match the result type,
+ // bitcast it to be a vector type we can extract from.
+ if (VecTy->getElementType() != DestTy) {
+ VecTy = VectorType::get(DestTy,
+ VecTy->getPrimitiveSizeInBits() / DestWidth);
+ VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+ }
+
+ unsigned Elt = ShAmt->getZExtValue() / DestWidth;
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+ }
+ }
+ return 0;
+}
+
+Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
+ // If the operands are integer typed then apply the integer transforms,
+ // otherwise just apply the common ones.
+ Value *Src = CI.getOperand(0);
+ const Type *SrcTy = Src->getType();
+ const Type *DestTy = CI.getType();
+
+ // Get rid of casts from one type to the same type. These are useless and can
+ // be replaced by the operand.
+ if (DestTy == Src->getType())
+ return ReplaceInstUsesWith(CI, Src);
+
+ if (const PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) {
+ const PointerType *SrcPTy = cast<PointerType>(SrcTy);
+ const Type *DstElTy = DstPTy->getElementType();
+ const Type *SrcElTy = SrcPTy->getElementType();
+
+ // If the address spaces don't match, don't eliminate the bitcast, which is
+ // required for changing types.
+ if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace())
+ return 0;
+
+ // If we are casting a alloca to a pointer to a type of the same
+ // size, rewrite the allocation instruction to allocate the "right" type.
+ // There is no need to modify malloc calls because it is their bitcast that
+ // needs to be cleaned up.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
+ if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
+ return V;
+
+ // If the source and destination are pointers, and this cast is equivalent
+ // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
+ // This can enhance SROA and other transforms that want type-safe pointers.
+ Constant *ZeroUInt =
+ Constant::getNullValue(Type::getInt32Ty(CI.getContext()));
+ unsigned NumZeros = 0;
+ while (SrcElTy != DstElTy &&
+ isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() &&
+ SrcElTy->getNumContainedTypes() /* not "{}" */) {
+ SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
+ ++NumZeros;
+ }
+
+ // If we found a path from the src to dest, create the getelementptr now.
+ if (SrcElTy == DstElTy) {
+ SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
+ return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end(),"",
+ ((Instruction*)NULL));
+ }
+ }
+
+ // Try to optimize int -> float bitcasts.
+ if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
+ if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
+ return I;
+
+ if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
+ if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
+ Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
+ return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
+ Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
+ // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
+ }
+
+ if (isa<IntegerType>(SrcTy)) {
+ // If this is a cast from an integer to vector, check to see if the input
+ // is a trunc or zext of a bitcast from vector. If so, we can replace all
+ // the casts with a shuffle and (potentially) a bitcast.
+ if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
+ CastInst *SrcCast = cast<CastInst>(Src);
+ if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
+ if (isa<VectorType>(BCIn->getOperand(0)->getType()))
+ if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
+ cast<VectorType>(DestTy), *this))
+ return I;
+ }
+
+ // If the input is an 'or' instruction, we may be doing shifts and ors to
+ // assemble the elements of the vector manually. Try to rip the code out
+ // and replace it with insertelements.
+ if (Value *V = OptimizeIntegerToVectorInsertions(CI, *this))
+ return ReplaceInstUsesWith(CI, V);
+ }
+ }
+
+ if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
+ if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) {
+ Value *Elem =
+ Builder->CreateExtractElement(Src,
+ Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
+ return CastInst::Create(Instruction::BitCast, Elem, DestTy);
+ }
+ }
+
+ if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) {
+ // Okay, we have (bitcast (shuffle ..)). Check to see if this is
+ // a bitcast to a vector with the same # elts.
+ if (SVI->hasOneUse() && DestTy->isVectorTy() &&
+ cast<VectorType>(DestTy)->getNumElements() ==
+ SVI->getType()->getNumElements() &&
+ SVI->getType()->getNumElements() ==
+ cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements()) {
+ BitCastInst *Tmp;
+ // If either of the operands is a cast from CI.getType(), then
+ // evaluating the shuffle in the casted destination's type will allow
+ // us to eliminate at least one cast.
+ if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) &&
+ Tmp->getOperand(0)->getType() == DestTy) ||
+ ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) &&
+ Tmp->getOperand(0)->getType() == DestTy)) {
+ Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy);
+ Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy);
+ // Return a new shuffle vector. Use the same element ID's, as we
+ // know the vector types match #elts.
+ return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2));
+ }
+ }
+ }
+
+ if (SrcTy->isPointerTy())
+ return commonPointerCastTransforms(CI);
+ return commonCastTransforms(CI);
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
new file mode 100644
index 0000000..d7e2b72
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -0,0 +1,2447 @@
+//===- InstCombineCompares.cpp --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitICmp and visitFCmp functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// AddOne - Add one to a ConstantInt
+static Constant *AddOne(Constant *C) {
+ return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+}
+/// SubOne - Subtract one from a ConstantInt
+static Constant *SubOne(ConstantInt *C) {
+ return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
+}
+
+static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
+ return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx));
+}
+
+static bool HasAddOverflow(ConstantInt *Result,
+ ConstantInt *In1, ConstantInt *In2,
+ bool IsSigned) {
+ if (IsSigned)
+ if (In2->getValue().isNegative())
+ return Result->getValue().sgt(In1->getValue());
+ else
+ return Result->getValue().slt(In1->getValue());
+ else
+ return Result->getValue().ult(In1->getValue());
+}
+
+/// AddWithOverflow - Compute Result = In1+In2, returning true if the result
+/// overflowed for this type.
+static bool AddWithOverflow(Constant *&Result, Constant *In1,
+ Constant *In2, bool IsSigned = false) {
+ Result = ConstantExpr::getAdd(In1, In2);
+
+ if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i);
+ if (HasAddOverflow(ExtractElement(Result, Idx),
+ ExtractElement(In1, Idx),
+ ExtractElement(In2, Idx),
+ IsSigned))
+ return true;
+ }
+ return false;
+ }
+
+ return HasAddOverflow(cast<ConstantInt>(Result),
+ cast<ConstantInt>(In1), cast<ConstantInt>(In2),
+ IsSigned);
+}
+
+static bool HasSubOverflow(ConstantInt *Result,
+ ConstantInt *In1, ConstantInt *In2,
+ bool IsSigned) {
+ if (IsSigned)
+ if (In2->getValue().isNegative())
+ return Result->getValue().slt(In1->getValue());
+ else
+ return Result->getValue().sgt(In1->getValue());
+ else
+ return Result->getValue().ugt(In1->getValue());
+}
+
+/// SubWithOverflow - Compute Result = In1-In2, returning true if the result
+/// overflowed for this type.
+static bool SubWithOverflow(Constant *&Result, Constant *In1,
+ Constant *In2, bool IsSigned = false) {
+ Result = ConstantExpr::getSub(In1, In2);
+
+ if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i);
+ if (HasSubOverflow(ExtractElement(Result, Idx),
+ ExtractElement(In1, Idx),
+ ExtractElement(In2, Idx),
+ IsSigned))
+ return true;
+ }
+ return false;
+ }
+
+ return HasSubOverflow(cast<ConstantInt>(Result),
+ cast<ConstantInt>(In1), cast<ConstantInt>(In2),
+ IsSigned);
+}
+
+/// isSignBitCheck - Given an exploded icmp instruction, return true if the
+/// comparison only checks the sign bit. If it only checks the sign bit, set
+/// TrueIfSigned if the result of the comparison is true when the input value is
+/// signed.
+static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
+ bool &TrueIfSigned) {
+ switch (pred) {
+ case ICmpInst::ICMP_SLT: // True if LHS s< 0
+ TrueIfSigned = true;
+ return RHS->isZero();
+ case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1
+ TrueIfSigned = true;
+ return RHS->isAllOnesValue();
+ case ICmpInst::ICMP_SGT: // True if LHS s> -1
+ TrueIfSigned = false;
+ return RHS->isAllOnesValue();
+ case ICmpInst::ICMP_UGT:
+ // True if LHS u> RHS and RHS == high-bit-mask - 1
+ TrueIfSigned = true;
+ return RHS->getValue() ==
+ APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits());
+ case ICmpInst::ICMP_UGE:
+ // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
+ TrueIfSigned = true;
+ return RHS->getValue().isSignBit();
+ default:
+ return false;
+ }
+}
+
+// isHighOnes - Return true if the constant is of the form 1+0+.
+// This is the same as lowones(~X).
+static bool isHighOnes(const ConstantInt *CI) {
+ return (~CI->getValue() + 1).isPowerOf2();
+}
+
+/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a
+/// set of known zero and one bits, compute the maximum and minimum values that
+/// could have the specified known zero and known one bits, returning them in
+/// min/max.
+static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero,
+ const APInt& KnownOne,
+ APInt& Min, APInt& Max) {
+ assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
+ KnownZero.getBitWidth() == Min.getBitWidth() &&
+ KnownZero.getBitWidth() == Max.getBitWidth() &&
+ "KnownZero, KnownOne and Min, Max must have equal bitwidth.");
+ APInt UnknownBits = ~(KnownZero|KnownOne);
+
+ // The minimum value is when all unknown bits are zeros, EXCEPT for the sign
+ // bit if it is unknown.
+ Min = KnownOne;
+ Max = KnownOne|UnknownBits;
+
+ if (UnknownBits.isNegative()) { // Sign bit is unknown
+ Min.set(Min.getBitWidth()-1);
+ Max.clear(Max.getBitWidth()-1);
+ }
+}
+
+// ComputeUnsignedMinMaxValuesFromKnownBits - Given an unsigned integer type and
+// a set of known zero and one bits, compute the maximum and minimum values that
+// could have the specified known zero and known one bits, returning them in
+// min/max.
+static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
+ const APInt &KnownOne,
+ APInt &Min, APInt &Max) {
+ assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
+ KnownZero.getBitWidth() == Min.getBitWidth() &&
+ KnownZero.getBitWidth() == Max.getBitWidth() &&
+ "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.");
+ APInt UnknownBits = ~(KnownZero|KnownOne);
+
+ // The minimum value is when the unknown bits are all zeros.
+ Min = KnownOne;
+ // The maximum value is when the unknown bits are all ones.
+ Max = KnownOne|UnknownBits;
+}
+
+
+
+/// FoldCmpLoadFromIndexedGlobal - Called we see this pattern:
+/// cmp pred (load (gep GV, ...)), cmpcst
+/// where GV is a global variable with a constant initializer. Try to simplify
+/// this into some simple computation that does not need the load. For example
+/// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3".
+///
+/// If AndCst is non-null, then the loaded value is masked with that constant
+/// before doing the comparison. This handles cases like "A[i]&4 == 0".
+Instruction *InstCombiner::
+FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
+ CmpInst &ICI, ConstantInt *AndCst) {
+ // We need TD information to know the pointer size unless this is inbounds.
+ if (!GEP->isInBounds() && TD == 0) return 0;
+
+ ConstantArray *Init = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (Init == 0 || Init->getNumOperands() > 1024) return 0;
+
+ // There are many forms of this optimization we can handle, for now, just do
+ // the simple index into a single-dimensional array.
+ //
+ // Require: GEP GV, 0, i {{, constant indices}}
+ if (GEP->getNumOperands() < 3 ||
+ !isa<ConstantInt>(GEP->getOperand(1)) ||
+ !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
+ isa<Constant>(GEP->getOperand(2)))
+ return 0;
+
+ // Check that indices after the variable are constants and in-range for the
+ // type they index. Collect the indices. This is typically for arrays of
+ // structs.
+ SmallVector<unsigned, 4> LaterIndices;
+
+ const Type *EltTy = cast<ArrayType>(Init->getType())->getElementType();
+ for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
+ ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (Idx == 0) return 0; // Variable index.
+
+ uint64_t IdxVal = Idx->getZExtValue();
+ if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index.
+
+ if (const StructType *STy = dyn_cast<StructType>(EltTy))
+ EltTy = STy->getElementType(IdxVal);
+ else if (const ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
+ if (IdxVal >= ATy->getNumElements()) return 0;
+ EltTy = ATy->getElementType();
+ } else {
+ return 0; // Unknown type.
+ }
+
+ LaterIndices.push_back(IdxVal);
+ }
+
+ enum { Overdefined = -3, Undefined = -2 };
+
+ // Variables for our state machines.
+
+ // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form
+ // "i == 47 | i == 87", where 47 is the first index the condition is true for,
+ // and 87 is the second (and last) index. FirstTrueElement is -2 when
+ // undefined, otherwise set to the first true element. SecondTrueElement is
+ // -2 when undefined, -3 when overdefined and >= 0 when that index is true.
+ int FirstTrueElement = Undefined, SecondTrueElement = Undefined;
+
+ // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the
+ // form "i != 47 & i != 87". Same state transitions as for true elements.
+ int FirstFalseElement = Undefined, SecondFalseElement = Undefined;
+
+ /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these
+ /// define a state machine that triggers for ranges of values that the index
+ /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'.
+ /// This is -2 when undefined, -3 when overdefined, and otherwise the last
+ /// index in the range (inclusive). We use -2 for undefined here because we
+ /// use relative comparisons and don't want 0-1 to match -1.
+ int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined;
+
+ // MagicBitvector - This is a magic bitvector where we set a bit if the
+ // comparison is true for element 'i'. If there are 64 elements or less in
+ // the array, this will fully represent all the comparison results.
+ uint64_t MagicBitvector = 0;
+
+
+ // Scan the array and see if one of our patterns matches.
+ Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
+ for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
+ Constant *Elt = Init->getOperand(i);
+
+ // If this is indexing an array of structures, get the structure element.
+ if (!LaterIndices.empty())
+ Elt = ConstantExpr::getExtractValue(Elt, LaterIndices.data(),
+ LaterIndices.size());
+
+ // If the element is masked, handle it.
+ if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst);
+
+ // Find out if the comparison would be true or false for the i'th element.
+ Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
+ CompareRHS, TD);
+ // If the result is undef for this element, ignore it.
+ if (isa<UndefValue>(C)) {
+ // Extend range state machines to cover this element in case there is an
+ // undef in the middle of the range.
+ if (TrueRangeEnd == (int)i-1)
+ TrueRangeEnd = i;
+ if (FalseRangeEnd == (int)i-1)
+ FalseRangeEnd = i;
+ continue;
+ }
+
+ // If we can't compute the result for any of the elements, we have to give
+ // up evaluating the entire conditional.
+ if (!isa<ConstantInt>(C)) return 0;
+
+ // Otherwise, we know if the comparison is true or false for this element,
+ // update our state machines.
+ bool IsTrueForElt = !cast<ConstantInt>(C)->isZero();
+
+ // State machine for single/double/range index comparison.
+ if (IsTrueForElt) {
+ // Update the TrueElement state machine.
+ if (FirstTrueElement == Undefined)
+ FirstTrueElement = TrueRangeEnd = i; // First true element.
+ else {
+ // Update double-compare state machine.
+ if (SecondTrueElement == Undefined)
+ SecondTrueElement = i;
+ else
+ SecondTrueElement = Overdefined;
+
+ // Update range state machine.
+ if (TrueRangeEnd == (int)i-1)
+ TrueRangeEnd = i;
+ else
+ TrueRangeEnd = Overdefined;
+ }
+ } else {
+ // Update the FalseElement state machine.
+ if (FirstFalseElement == Undefined)
+ FirstFalseElement = FalseRangeEnd = i; // First false element.
+ else {
+ // Update double-compare state machine.
+ if (SecondFalseElement == Undefined)
+ SecondFalseElement = i;
+ else
+ SecondFalseElement = Overdefined;
+
+ // Update range state machine.
+ if (FalseRangeEnd == (int)i-1)
+ FalseRangeEnd = i;
+ else
+ FalseRangeEnd = Overdefined;
+ }
+ }
+
+
+ // If this element is in range, update our magic bitvector.
+ if (i < 64 && IsTrueForElt)
+ MagicBitvector |= 1ULL << i;
+
+ // If all of our states become overdefined, bail out early. Since the
+ // predicate is expensive, only check it every 8 elements. This is only
+ // really useful for really huge arrays.
+ if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined &&
+ SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined &&
+ FalseRangeEnd == Overdefined)
+ return 0;
+ }
+
+ // Now that we've scanned the entire array, emit our new comparison(s). We
+ // order the state machines in complexity of the generated code.
+ Value *Idx = GEP->getOperand(2);
+
+ // If the index is larger than the pointer size of the target, truncate the
+ // index down like the GEP would do implicitly. We don't have to do this for
+ // an inbounds GEP because the index can't be out of range.
+ if (!GEP->isInBounds() &&
+ Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits())
+ Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext()));
+
+ // If the comparison is only true for one or two elements, emit direct
+ // comparisons.
+ if (SecondTrueElement != Overdefined) {
+ // None true -> false.
+ if (FirstTrueElement == Undefined)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(GEP->getContext()));
+
+ Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement);
+
+ // True for one element -> 'i == 47'.
+ if (SecondTrueElement == Undefined)
+ return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx);
+
+ // True for two elements -> 'i == 47 | i == 72'.
+ Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx);
+ Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement);
+ Value *C2 = Builder->CreateICmpEQ(Idx, SecondTrueIdx);
+ return BinaryOperator::CreateOr(C1, C2);
+ }
+
+ // If the comparison is only false for one or two elements, emit direct
+ // comparisons.
+ if (SecondFalseElement != Overdefined) {
+ // None false -> true.
+ if (FirstFalseElement == Undefined)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(GEP->getContext()));
+
+ Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement);
+
+ // False for one element -> 'i != 47'.
+ if (SecondFalseElement == Undefined)
+ return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx);
+
+ // False for two elements -> 'i != 47 & i != 72'.
+ Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx);
+ Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement);
+ Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx);
+ return BinaryOperator::CreateAnd(C1, C2);
+ }
+
+ // If the comparison can be replaced with a range comparison for the elements
+ // where it is true, emit the range check.
+ if (TrueRangeEnd != Overdefined) {
+ assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
+
+ // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
+ if (FirstTrueElement) {
+ Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
+ Idx = Builder->CreateAdd(Idx, Offs);
+ }
+
+ Value *End = ConstantInt::get(Idx->getType(),
+ TrueRangeEnd-FirstTrueElement+1);
+ return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
+ }
+
+ // False range check.
+ if (FalseRangeEnd != Overdefined) {
+ assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
+ // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
+ if (FirstFalseElement) {
+ Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
+ Idx = Builder->CreateAdd(Idx, Offs);
+ }
+
+ Value *End = ConstantInt::get(Idx->getType(),
+ FalseRangeEnd-FirstFalseElement);
+ return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
+ }
+
+
+ // If a 32-bit or 64-bit magic bitvector captures the entire comparison state
+ // of this load, replace it with computation that does:
+ // ((magic_cst >> i) & 1) != 0
+ if (Init->getNumOperands() <= 32 ||
+ (TD && Init->getNumOperands() <= 64 && TD->isLegalInteger(64))) {
+ const Type *Ty;
+ if (Init->getNumOperands() <= 32)
+ Ty = Type::getInt32Ty(Init->getContext());
+ else
+ Ty = Type::getInt64Ty(Init->getContext());
+ Value *V = Builder->CreateIntCast(Idx, Ty, false);
+ V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
+ V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
+ return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
+ }
+
+ return 0;
+}
+
+
+/// EvaluateGEPOffsetExpression - Return a value that can be used to compare
+/// the *offset* implied by a GEP to zero. For example, if we have &A[i], we
+/// want to return 'i' for "icmp ne i, 0". Note that, in general, indices can
+/// be complex, and scales are involved. The above expression would also be
+/// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32).
+/// This later form is less amenable to optimization though, and we are allowed
+/// to generate the first by knowing that pointer arithmetic doesn't overflow.
+///
+/// If we can't emit an optimized form for this expression, this returns null.
+///
+static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,
+ InstCombiner &IC) {
+ TargetData &TD = *IC.getTargetData();
+ gep_type_iterator GTI = gep_type_begin(GEP);
+
+ // Check to see if this gep only has a single variable index. If so, and if
+ // any constant indices are a multiple of its scale, then we can compute this
+ // in terms of the scale of the variable index. For example, if the GEP
+ // implies an offset of "12 + i*4", then we can codegen this as "3 + i",
+ // because the expression will cross zero at the same point.
+ unsigned i, e = GEP->getNumOperands();
+ int64_t Offset = 0;
+ for (i = 1; i != e; ++i, ++GTI) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
+ // Compute the aggregate offset of constant indices.
+ if (CI->isZero()) continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+ } else {
+ uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+ Offset += Size*CI->getSExtValue();
+ }
+ } else {
+ // Found our variable index.
+ break;
+ }
+ }
+
+ // If there are no variable indices, we must have a constant offset, just
+ // evaluate it the general way.
+ if (i == e) return 0;
+
+ Value *VariableIdx = GEP->getOperand(i);
+ // Determine the scale factor of the variable element. For example, this is
+ // 4 if the variable index is into an array of i32.
+ uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType());
+
+ // Verify that there are no other variable indices. If so, emit the hard way.
+ for (++i, ++GTI; i != e; ++i, ++GTI) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (!CI) return 0;
+
+ // Compute the aggregate offset of constant indices.
+ if (CI->isZero()) continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+ } else {
+ uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+ Offset += Size*CI->getSExtValue();
+ }
+ }
+
+ // Okay, we know we have a single variable index, which must be a
+ // pointer/array/vector index. If there is no offset, life is simple, return
+ // the index.
+ unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ if (Offset == 0) {
+ // Cast to intptrty in case a truncation occurs. If an extension is needed,
+ // we don't need to bother extending: the extension won't affect where the
+ // computation crosses zero.
+ if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth)
+ VariableIdx = new TruncInst(VariableIdx,
+ TD.getIntPtrType(VariableIdx->getContext()),
+ VariableIdx->getName(), &I);
+ return VariableIdx;
+ }
+
+ // Otherwise, there is an index. The computation we will do will be modulo
+ // the pointer size, so get it.
+ uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
+
+ Offset &= PtrSizeMask;
+ VariableScale &= PtrSizeMask;
+
+ // To do this transformation, any constant index must be a multiple of the
+ // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i",
+ // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a
+ // multiple of the variable scale.
+ int64_t NewOffs = Offset / (int64_t)VariableScale;
+ if (Offset != NewOffs*(int64_t)VariableScale)
+ return 0;
+
+ // Okay, we can do this evaluation. Start by converting the index to intptr.
+ const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
+ if (VariableIdx->getType() != IntPtrTy)
+ VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy,
+ true /*SExt*/,
+ VariableIdx->getName(), &I);
+ Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
+ return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I);
+}
+
+/// FoldGEPICmp - Fold comparisons between a GEP instruction and something
+/// else. At this point we know that the GEP is on the LHS of the comparison.
+Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
+ ICmpInst::Predicate Cond,
+ Instruction &I) {
+ // Look through bitcasts.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS))
+ RHS = BCI->getOperand(0);
+
+ Value *PtrBase = GEPLHS->getOperand(0);
+ if (TD && PtrBase == RHS && GEPLHS->isInBounds()) {
+ // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
+ // This transformation (ignoring the base and scales) is valid because we
+ // know pointers can't overflow since the gep is inbounds. See if we can
+ // output an optimized form.
+ Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this);
+
+ // If not, synthesize the offset the hard way.
+ if (Offset == 0)
+ Offset = EmitGEPOffset(GEPLHS);
+ return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
+ Constant::getNullValue(Offset->getType()));
+ } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) {
+ // If the base pointers are different, but the indices are the same, just
+ // compare the base pointer.
+ if (PtrBase != GEPRHS->getOperand(0)) {
+ bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands();
+ IndicesTheSame &= GEPLHS->getOperand(0)->getType() ==
+ GEPRHS->getOperand(0)->getType();
+ if (IndicesTheSame)
+ for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
+ if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
+ IndicesTheSame = false;
+ break;
+ }
+
+ // If all indices are the same, just compare the base pointers.
+ if (IndicesTheSame)
+ return new ICmpInst(ICmpInst::getSignedPredicate(Cond),
+ GEPLHS->getOperand(0), GEPRHS->getOperand(0));
+
+ // Otherwise, the base pointers are different and the indices are
+ // different, bail out.
+ return 0;
+ }
+
+ // If one of the GEPs has all zero indices, recurse.
+ bool AllZeros = true;
+ for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
+ if (!isa<Constant>(GEPLHS->getOperand(i)) ||
+ !cast<Constant>(GEPLHS->getOperand(i))->isNullValue()) {
+ AllZeros = false;
+ break;
+ }
+ if (AllZeros)
+ return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0),
+ ICmpInst::getSwappedPredicate(Cond), I);
+
+ // If the other GEP has all zero indices, recurse.
+ AllZeros = true;
+ for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
+ if (!isa<Constant>(GEPRHS->getOperand(i)) ||
+ !cast<Constant>(GEPRHS->getOperand(i))->isNullValue()) {
+ AllZeros = false;
+ break;
+ }
+ if (AllZeros)
+ return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);
+
+ if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) {
+ // If the GEPs only differ by one index, compare it.
+ unsigned NumDifferences = 0; // Keep track of # differences.
+ unsigned DiffOperand = 0; // The operand that differs.
+ for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
+ if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
+ if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() !=
+ GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) {
+ // Irreconcilable differences.
+ NumDifferences = 2;
+ break;
+ } else {
+ if (NumDifferences++) break;
+ DiffOperand = i;
+ }
+ }
+
+ if (NumDifferences == 0) // SAME GEP?
+ return ReplaceInstUsesWith(I, // No comparison is needed here.
+ ConstantInt::get(Type::getInt1Ty(I.getContext()),
+ ICmpInst::isTrueWhenEqual(Cond)));
+
+ else if (NumDifferences == 1) {
+ Value *LHSV = GEPLHS->getOperand(DiffOperand);
+ Value *RHSV = GEPRHS->getOperand(DiffOperand);
+ // Make sure we do a signed comparison here.
+ return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV);
+ }
+ }
+
+ // Only lower this if the icmp is the only user of the GEP or if we expect
+ // the result to fold to a constant!
+ if (TD &&
+ (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
+ (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
+ // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2)
+ Value *L = EmitGEPOffset(GEPLHS);
+ Value *R = EmitGEPOffset(GEPRHS);
+ return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R);
+ }
+ }
+ return 0;
+}
+
+/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X".
+Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
+ Value *X, ConstantInt *CI,
+ ICmpInst::Predicate Pred,
+ Value *TheAdd) {
+ // If we have X+0, exit early (simplifying logic below) and let it get folded
+ // elsewhere. icmp X+0, X -> icmp X, X
+ if (CI->isZero()) {
+ bool isTrue = ICmpInst::isTrueWhenEqual(Pred);
+ return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
+ }
+
+ // (X+4) == X -> false.
+ if (Pred == ICmpInst::ICMP_EQ)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext()));
+
+ // (X+4) != X -> true.
+ if (Pred == ICmpInst::ICMP_NE)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext()));
+
+ // If this is an instruction (as opposed to constantexpr) get NUW/NSW info.
+ bool isNUW = false, isNSW = false;
+ if (BinaryOperator *Add = dyn_cast<BinaryOperator>(TheAdd)) {
+ isNUW = Add->hasNoUnsignedWrap();
+ isNSW = Add->hasNoSignedWrap();
+ }
+
+ // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
+ // so the values can never be equal. Similiarly for all other "or equals"
+ // operators.
+
+ // (X+1) <u X --> X >u (MAXUINT-1) --> X == 255
+ // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253
+ // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0
+ if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
+ // If this is an NUW add, then this is always false.
+ if (isNUW)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext()));
+
+ Value *R =
+ ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI);
+ return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
+ }
+
+ // (X+1) >u X --> X <u (0-1) --> X != 255
+ // (X+2) >u X --> X <u (0-2) --> X <u 254
+ // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0
+ if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
+ // If this is an NUW add, then this is always true.
+ if (isNUW)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext()));
+ return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI));
+ }
+
+ unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits();
+ ConstantInt *SMax = ConstantInt::get(X->getContext(),
+ APInt::getSignedMaxValue(BitWidth));
+
+ // (X+ 1) <s X --> X >s (MAXSINT-1) --> X == 127
+ // (X+ 2) <s X --> X >s (MAXSINT-2) --> X >s 125
+ // (X+MAXSINT) <s X --> X >s (MAXSINT-MAXSINT) --> X >s 0
+ // (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1
+ // (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126
+ // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127
+ if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) {
+ // If this is an NSW add, then we have two cases: if the constant is
+ // positive, then this is always false, if negative, this is always true.
+ if (isNSW) {
+ bool isTrue = CI->getValue().isNegative();
+ return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
+ }
+
+ return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI));
+ }
+
+ // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127
+ // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126
+ // (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1
+ // (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2
+ // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126
+ // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128
+
+ // If this is an NSW add, then we have two cases: if the constant is
+ // positive, then this is always true, if negative, this is always false.
+ if (isNSW) {
+ bool isTrue = !CI->getValue().isNegative();
+ return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
+ }
+
+ assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
+ Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1);
+ return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
+}
+
+/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS
+/// and CmpRHS are both known to be integer constants.
+Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+ ConstantInt *DivRHS) {
+ ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1));
+ const APInt &CmpRHSV = CmpRHS->getValue();
+
+ // FIXME: If the operand types don't match the type of the divide
+ // then don't attempt this transform. The code below doesn't have the
+ // logic to deal with a signed divide and an unsigned compare (and
+ // vice versa). This is because (x /s C1) <s C2 produces different
+ // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
+ // (x /u C1) <u C2. Simply casting the operands and result won't
+ // work. :( The if statement below tests that condition and bails
+ // if it finds it.
+ bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
+ if (!ICI.isEquality() && DivIsSigned != ICI.isSigned())
+ return 0;
+ if (DivRHS->isZero())
+ return 0; // The ProdOV computation fails on divide by zero.
+ if (DivIsSigned && DivRHS->isAllOnesValue())
+ return 0; // The overflow computation also screws up here
+ if (DivRHS->isOne())
+ return 0; // Not worth bothering, and eliminates some funny cases
+ // with INT_MIN.
+
+ // Compute Prod = CI * DivRHS. We are essentially solving an equation
+ // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and
+ // C2 (CI). By solving for X we can turn this into a range check
+ // instead of computing a divide.
+ Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
+
+ // Determine if the product overflows by seeing if the product is
+ // not equal to the divide. Make sure we do the same kind of divide
+ // as in the LHS instruction that we're folding.
+ bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
+ ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
+
+ // Get the ICmp opcode
+ ICmpInst::Predicate Pred = ICI.getPredicate();
+
+ // Figure out the interval that is being checked. For example, a comparison
+ // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
+ // Compute this interval based on the constants involved and the signedness of
+ // the compare/divide. This computes a half-open interval, keeping track of
+ // whether either value in the interval overflows. After analysis each
+ // overflow variable is set to 0 if it's corresponding bound variable is valid
+ // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
+ int LoOverflow = 0, HiOverflow = 0;
+ Constant *LoBound = 0, *HiBound = 0;
+
+ if (!DivIsSigned) { // udiv
+ // e.g. X/5 op 3 --> [15, 20)
+ LoBound = Prod;
+ HiOverflow = LoOverflow = ProdOV;
+ if (!HiOverflow)
+ HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, false);
+ } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
+ if (CmpRHSV == 0) { // (X / pos) op 0
+ // Can't overflow. e.g. X/2 op 0 --> [-1, 2)
+ LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS)));
+ HiBound = DivRHS;
+ } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos
+ LoBound = Prod; // e.g. X/5 op 3 --> [15, 20)
+ HiOverflow = LoOverflow = ProdOV;
+ if (!HiOverflow)
+ HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, true);
+ } else { // (X / pos) op neg
+ // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14)
+ HiBound = AddOne(Prod);
+ LoOverflow = HiOverflow = ProdOV ? -1 : 0;
+ if (!LoOverflow) {
+ ConstantInt* DivNeg =
+ cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
+ LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
+ }
+ }
+ } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0.
+ if (CmpRHSV == 0) { // (X / neg) op 0
+ // e.g. X/-5 op 0 --> [-4, 5)
+ LoBound = AddOne(DivRHS);
+ HiBound = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
+ if (HiBound == DivRHS) { // -INTMIN = INTMIN
+ HiOverflow = 1; // [INTMIN+1, overflow)
+ HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN
+ }
+ } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos
+ // e.g. X/-5 op 3 --> [-19, -14)
+ HiBound = AddOne(Prod);
+ HiOverflow = LoOverflow = ProdOV ? -1 : 0;
+ if (!LoOverflow)
+ LoOverflow = AddWithOverflow(LoBound, HiBound, DivRHS, true) ? -1 : 0;
+ } else { // (X / neg) op neg
+ LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20)
+ LoOverflow = HiOverflow = ProdOV;
+ if (!HiOverflow)
+ HiOverflow = SubWithOverflow(HiBound, Prod, DivRHS, true);
+ }
+
+ // Dividing by a negative swaps the condition. LT <-> GT
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ Value *X = DivI->getOperand(0);
+ switch (Pred) {
+ default: llvm_unreachable("Unhandled icmp opcode!");
+ case ICmpInst::ICMP_EQ:
+ if (LoOverflow && HiOverflow)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+ if (HiOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+ ICmpInst::ICMP_UGE, X, LoBound);
+ if (LoOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+ ICmpInst::ICMP_ULT, X, HiBound);
+ return ReplaceInstUsesWith(ICI,
+ InsertRangeTest(X, LoBound, HiBound, DivIsSigned,
+ true));
+ case ICmpInst::ICMP_NE:
+ if (LoOverflow && HiOverflow)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+ if (HiOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+ ICmpInst::ICMP_ULT, X, LoBound);
+ if (LoOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+ ICmpInst::ICMP_UGE, X, HiBound);
+ return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+ DivIsSigned, false));
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT:
+ if (LoOverflow == +1) // Low bound is greater than input range.
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+ if (LoOverflow == -1) // Low bound is less than input range.
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+ return new ICmpInst(Pred, X, LoBound);
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT:
+ if (HiOverflow == +1) // High bound greater than input range.
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+ else if (HiOverflow == -1) // High bound less than input range.
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+ if (Pred == ICmpInst::ICMP_UGT)
+ return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
+ else
+ return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
+ }
+}
+
+
+/// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)".
+///
+Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
+ Instruction *LHSI,
+ ConstantInt *RHS) {
+ const APInt &RHSV = RHS->getValue();
+
+ switch (LHSI->getOpcode()) {
+ case Instruction::Trunc:
+ if (ICI.isEquality() && LHSI->hasOneUse()) {
+ // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
+ // of the high bits truncated out of x are known.
+ unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(),
+ SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits));
+ APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0);
+ ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne);
+
+ // If all the high bits are known, we can do this xform.
+ if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) {
+ // Pull in the high bits from known-ones set.
+ APInt NewRHS(RHS->getValue());
+ NewRHS.zext(SrcBits);
+ NewRHS |= KnownOne;
+ return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+ ConstantInt::get(ICI.getContext(), NewRHS));
+ }
+ }
+ break;
+
+ case Instruction::Xor: // (icmp pred (xor X, XorCST), CI)
+ if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+ // If this is a comparison that tests the signbit (X < 0) or (x > -1),
+ // fold the xor.
+ if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) ||
+ (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) {
+ Value *CompareVal = LHSI->getOperand(0);
+
+ // If the sign bit of the XorCST is not set, there is no change to
+ // the operation, just stop using the Xor.
+ if (!XorCST->getValue().isNegative()) {
+ ICI.setOperand(0, CompareVal);
+ Worklist.Add(LHSI);
+ return &ICI;
+ }
+
+ // Was the old condition true if the operand is positive?
+ bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT;
+
+ // If so, the new one isn't.
+ isTrueIfPositive ^= true;
+
+ if (isTrueIfPositive)
+ return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal,
+ SubOne(RHS));
+ else
+ return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal,
+ AddOne(RHS));
+ }
+
+ if (LHSI->hasOneUse()) {
+ // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit))
+ if (!ICI.isEquality() && XorCST->getValue().isSignBit()) {
+ const APInt &SignBit = XorCST->getValue();
+ ICmpInst::Predicate Pred = ICI.isSigned()
+ ? ICI.getUnsignedPredicate()
+ : ICI.getSignedPredicate();
+ return new ICmpInst(Pred, LHSI->getOperand(0),
+ ConstantInt::get(ICI.getContext(),
+ RHSV ^ SignBit));
+ }
+
+ // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
+ if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) {
+ const APInt &NotSignBit = XorCST->getValue();
+ ICmpInst::Predicate Pred = ICI.isSigned()
+ ? ICI.getUnsignedPredicate()
+ : ICI.getSignedPredicate();
+ Pred = ICI.getSwappedPredicate(Pred);
+ return new ICmpInst(Pred, LHSI->getOperand(0),
+ ConstantInt::get(ICI.getContext(),
+ RHSV ^ NotSignBit));
+ }
+ }
+ }
+ break;
+ case Instruction::And: // (icmp pred (and X, AndCST), RHS)
+ if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) &&
+ LHSI->getOperand(0)->hasOneUse()) {
+ ConstantInt *AndCST = cast<ConstantInt>(LHSI->getOperand(1));
+
+ // If the LHS is an AND of a truncating cast, we can widen the
+ // and/compare to be the input width without changing the value
+ // produced, eliminating a cast.
+ if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) {
+ // We can do this transformation if either the AND constant does not
+ // have its sign bit set or if it is an equality comparison.
+ // Extending a relational comparison when we're checking the sign
+ // bit would not work.
+ if (Cast->hasOneUse() &&
+ (ICI.isEquality() ||
+ (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) {
+ uint32_t BitWidth =
+ cast<IntegerType>(Cast->getOperand(0)->getType())->getBitWidth();
+ APInt NewCST = AndCST->getValue();
+ NewCST.zext(BitWidth);
+ APInt NewCI = RHSV;
+ NewCI.zext(BitWidth);
+ Value *NewAnd =
+ Builder->CreateAnd(Cast->getOperand(0),
+ ConstantInt::get(ICI.getContext(), NewCST),
+ LHSI->getName());
+ return new ICmpInst(ICI.getPredicate(), NewAnd,
+ ConstantInt::get(ICI.getContext(), NewCI));
+ }
+ }
+
+ // If this is: (X >> C1) & C2 != C3 (where any shift and any compare
+ // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This
+ // happens a LOT in code produced by the C front-end, for bitfield
+ // access.
+ BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0));
+ if (Shift && !Shift->isShift())
+ Shift = 0;
+
+ ConstantInt *ShAmt;
+ ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0;
+ const Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift.
+ const Type *AndTy = AndCST->getType(); // Type of the and.
+
+ // We can fold this as long as we can't shift unknown bits
+ // into the mask. This can only happen with signed shift
+ // rights, as they sign-extend.
+ if (ShAmt) {
+ bool CanFold = Shift->isLogicalShift();
+ if (!CanFold) {
+ // To test for the bad case of the signed shr, see if any
+ // of the bits shifted in could be tested after the mask.
+ uint32_t TyBits = Ty->getPrimitiveSizeInBits();
+ int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits);
+
+ uint32_t BitWidth = AndTy->getPrimitiveSizeInBits();
+ if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) &
+ AndCST->getValue()) == 0)
+ CanFold = true;
+ }
+
+ if (CanFold) {
+ Constant *NewCst;
+ if (Shift->getOpcode() == Instruction::Shl)
+ NewCst = ConstantExpr::getLShr(RHS, ShAmt);
+ else
+ NewCst = ConstantExpr::getShl(RHS, ShAmt);
+
+ // Check to see if we are shifting out any of the bits being
+ // compared.
+ if (ConstantExpr::get(Shift->getOpcode(),
+ NewCst, ShAmt) != RHS) {
+ // If we shifted bits out, the fold is not going to work out.
+ // As a special case, check to see if this means that the
+ // result is always true or false now.
+ if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
+ return ReplaceInstUsesWith(ICI,
+ ConstantInt::getFalse(ICI.getContext()));
+ if (ICI.getPredicate() == ICmpInst::ICMP_NE)
+ return ReplaceInstUsesWith(ICI,
+ ConstantInt::getTrue(ICI.getContext()));
+ } else {
+ ICI.setOperand(1, NewCst);
+ Constant *NewAndCST;
+ if (Shift->getOpcode() == Instruction::Shl)
+ NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt);
+ else
+ NewAndCST = ConstantExpr::getShl(AndCST, ShAmt);
+ LHSI->setOperand(1, NewAndCST);
+ LHSI->setOperand(0, Shift->getOperand(0));
+ Worklist.Add(Shift); // Shift is dead.
+ return &ICI;
+ }
+ }
+ }
+
+ // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is
+ // preferable because it allows the C<<Y expression to be hoisted out
+ // of a loop if Y is invariant and X is not.
+ if (Shift && Shift->hasOneUse() && RHSV == 0 &&
+ ICI.isEquality() && !Shift->isArithmeticShift() &&
+ !isa<Constant>(Shift->getOperand(0))) {
+ // Compute C << Y.
+ Value *NS;
+ if (Shift->getOpcode() == Instruction::LShr) {
+ NS = Builder->CreateShl(AndCST, Shift->getOperand(1), "tmp");
+ } else {
+ // Insert a logical shift.
+ NS = Builder->CreateLShr(AndCST, Shift->getOperand(1), "tmp");
+ }
+
+ // Compute X & (C << Y).
+ Value *NewAnd =
+ Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName());
+
+ ICI.setOperand(0, NewAnd);
+ return &ICI;
+ }
+ }
+
+ // Try to optimize things like "A[i]&42 == 0" to index computations.
+ if (LoadInst *LI = dyn_cast<LoadInst>(LHSI->getOperand(0))) {
+ if (GetElementPtrInst *GEP =
+ dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+ !LI->isVolatile() && isa<ConstantInt>(LHSI->getOperand(1))) {
+ ConstantInt *C = cast<ConstantInt>(LHSI->getOperand(1));
+ if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV,ICI, C))
+ return Res;
+ }
+ }
+ break;
+
+ case Instruction::Or: {
+ if (!ICI.isEquality() || !RHS->isNullValue() || !LHSI->hasOneUse())
+ break;
+ Value *P, *Q;
+ if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
+ // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
+ // -> and (icmp eq P, null), (icmp eq Q, null).
+
+ Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P,
+ Constant::getNullValue(P->getType()));
+ Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q,
+ Constant::getNullValue(Q->getType()));
+ Instruction *Op;
+ if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
+ Op = BinaryOperator::CreateAnd(ICIP, ICIQ);
+ else
+ Op = BinaryOperator::CreateOr(ICIP, ICIQ);
+ return Op;
+ }
+ break;
+ }
+
+ case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI)
+ ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+ if (!ShAmt) break;
+
+ uint32_t TypeBits = RHSV.getBitWidth();
+
+ // Check that the shift amount is in range. If not, don't perform
+ // undefined shifts. When the shift is visited it will be
+ // simplified.
+ if (ShAmt->uge(TypeBits))
+ break;
+
+ if (ICI.isEquality()) {
+ // If we are comparing against bits always shifted out, the
+ // comparison cannot succeed.
+ Constant *Comp =
+ ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt),
+ ShAmt);
+ if (Comp != RHS) {// Comparing against a bit that we know is zero.
+ bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+ Constant *Cst =
+ ConstantInt::get(Type::getInt1Ty(ICI.getContext()), IsICMP_NE);
+ return ReplaceInstUsesWith(ICI, Cst);
+ }
+
+ if (LHSI->hasOneUse()) {
+ // Otherwise strength reduce the shift into an and.
+ uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+ Constant *Mask =
+ ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits,
+ TypeBits-ShAmtVal));
+
+ Value *And =
+ Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask");
+ return new ICmpInst(ICI.getPredicate(), And,
+ ConstantInt::get(ICI.getContext(),
+ RHSV.lshr(ShAmtVal)));
+ }
+ }
+
+ // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
+ bool TrueIfSigned = false;
+ if (LHSI->hasOneUse() &&
+ isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) {
+ // (X << 31) <s 0 --> (X&1) != 0
+ Constant *Mask = ConstantInt::get(ICI.getContext(), APInt(TypeBits, 1) <<
+ (TypeBits-ShAmt->getZExtValue()-1));
+ Value *And =
+ Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask");
+ return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
+ And, Constant::getNullValue(And->getType()));
+ }
+ break;
+ }
+
+ case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI)
+ case Instruction::AShr: {
+ // Only handle equality comparisons of shift-by-constant.
+ ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+ if (!ShAmt || !ICI.isEquality()) break;
+
+ // Check that the shift amount is in range. If not, don't perform
+ // undefined shifts. When the shift is visited it will be
+ // simplified.
+ uint32_t TypeBits = RHSV.getBitWidth();
+ if (ShAmt->uge(TypeBits))
+ break;
+
+ uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+
+ // If we are comparing against bits always shifted out, the
+ // comparison cannot succeed.
+ APInt Comp = RHSV << ShAmtVal;
+ if (LHSI->getOpcode() == Instruction::LShr)
+ Comp = Comp.lshr(ShAmtVal);
+ else
+ Comp = Comp.ashr(ShAmtVal);
+
+ if (Comp != RHSV) { // Comparing against a bit that we know is zero.
+ bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+ Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
+ IsICMP_NE);
+ return ReplaceInstUsesWith(ICI, Cst);
+ }
+
+ // Otherwise, check to see if the bits shifted out are known to be zero.
+ // If so, we can compare against the unshifted value:
+ // (X & 4) >> 1 == 2 --> (X & 4) == 4.
+ if (LHSI->hasOneUse() &&
+ MaskedValueIsZero(LHSI->getOperand(0),
+ APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) {
+ return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+ ConstantExpr::getShl(RHS, ShAmt));
+ }
+
+ if (LHSI->hasOneUse()) {
+ // Otherwise strength reduce the shift into an and.
+ APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
+ Constant *Mask = ConstantInt::get(ICI.getContext(), Val);
+
+ Value *And = Builder->CreateAnd(LHSI->getOperand(0),
+ Mask, LHSI->getName()+".mask");
+ return new ICmpInst(ICI.getPredicate(), And,
+ ConstantExpr::getShl(RHS, ShAmt));
+ }
+ break;
+ }
+
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ // Fold: icmp pred ([us]div X, C1), C2 -> range test
+ // Fold this div into the comparison, producing a range check.
+ // Determine, based on the divide type, what the range is being
+ // checked. If there is an overflow on the low or high side, remember
+ // it, otherwise compute the range [low, hi) bounding the new value.
+ // See: InsertRangeTest above for the kinds of replacements possible.
+ if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1)))
+ if (Instruction *R = FoldICmpDivCst(ICI, cast<BinaryOperator>(LHSI),
+ DivRHS))
+ return R;
+ break;
+
+ case Instruction::Add:
+ // Fold: icmp pred (add X, C1), C2
+ if (!ICI.isEquality()) {
+ ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+ if (!LHSC) break;
+ const APInt &LHSV = LHSC->getValue();
+
+ ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV)
+ .subtract(LHSV);
+
+ if (ICI.isSigned()) {
+ if (CR.getLower().isSignBit()) {
+ return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0),
+ ConstantInt::get(ICI.getContext(),CR.getUpper()));
+ } else if (CR.getUpper().isSignBit()) {
+ return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0),
+ ConstantInt::get(ICI.getContext(),CR.getLower()));
+ }
+ } else {
+ if (CR.getLower().isMinValue()) {
+ return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0),
+ ConstantInt::get(ICI.getContext(),CR.getUpper()));
+ } else if (CR.getUpper().isMinValue()) {
+ return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0),
+ ConstantInt::get(ICI.getContext(),CR.getLower()));
+ }
+ }
+ }
+ break;
+ }
+
+ // Simplify icmp_eq and icmp_ne instructions with integer constant RHS.
+ if (ICI.isEquality()) {
+ bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+
+ // If the first operand is (add|sub|and|or|xor|rem) with a constant, and
+ // the second operand is a constant, simplify a bit.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) {
+ switch (BO->getOpcode()) {
+ case Instruction::SRem:
+ // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
+ if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){
+ const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue();
+ if (V.sgt(1) && V.isPowerOf2()) {
+ Value *NewRem =
+ Builder->CreateURem(BO->getOperand(0), BO->getOperand(1),
+ BO->getName());
+ return new ICmpInst(ICI.getPredicate(), NewRem,
+ Constant::getNullValue(BO->getType()));
+ }
+ }
+ break;
+ case Instruction::Add:
+ // Replace ((add A, B) != C) with (A != C-B) if B & C are constants.
+ if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ if (BO->hasOneUse())
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ ConstantExpr::getSub(RHS, BOp1C));
+ } else if (RHSV == 0) {
+ // Replace ((add A, B) != 0) with (A != -B) if A or B is
+ // efficiently invertible, or if the add has just this one use.
+ Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
+
+ if (Value *NegVal = dyn_castNegVal(BOp1))
+ return new ICmpInst(ICI.getPredicate(), BOp0, NegVal);
+ else if (Value *NegVal = dyn_castNegVal(BOp0))
+ return new ICmpInst(ICI.getPredicate(), NegVal, BOp1);
+ else if (BO->hasOneUse()) {
+ Value *Neg = Builder->CreateNeg(BOp1);
+ Neg->takeName(BO);
+ return new ICmpInst(ICI.getPredicate(), BOp0, Neg);
+ }
+ }
+ break;
+ case Instruction::Xor:
+ // For the xor case, we can xor two constants together, eliminating
+ // the explicit xor.
+ if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1)))
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ ConstantExpr::getXor(RHS, BOC));
+
+ // FALLTHROUGH
+ case Instruction::Sub:
+ // Replace (([sub|xor] A, B) != 0) with (A != B)
+ if (RHSV == 0)
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ BO->getOperand(1));
+ break;
+
+ case Instruction::Or:
+ // If bits are being or'd in that are not present in the constant we
+ // are comparing against, then the comparison could never succeed!
+ if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ Constant *NotCI = ConstantExpr::getNot(RHS);
+ if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())
+ return ReplaceInstUsesWith(ICI,
+ ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
+ isICMP_NE));
+ }
+ break;
+
+ case Instruction::And:
+ if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ // If bits are being compared against that are and'd out, then the
+ // comparison can never succeed!
+ if ((RHSV & ~BOC->getValue()) != 0)
+ return ReplaceInstUsesWith(ICI,
+ ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
+ isICMP_NE));
+
+ // If we have ((X & C) == C), turn it into ((X & C) != 0).
+ if (RHS == BOC && RHSV.isPowerOf2())
+ return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ :
+ ICmpInst::ICMP_NE, LHSI,
+ Constant::getNullValue(RHS->getType()));
+
+ // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
+ if (BOC->getValue().isSignBit()) {
+ Value *X = BO->getOperand(0);
+ Constant *Zero = Constant::getNullValue(X->getType());
+ ICmpInst::Predicate pred = isICMP_NE ?
+ ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
+ return new ICmpInst(pred, X, Zero);
+ }
+
+ // ((X & ~7) == 0) --> X < 8
+ if (RHSV == 0 && isHighOnes(BOC)) {
+ Value *X = BO->getOperand(0);
+ Constant *NegX = ConstantExpr::getNeg(BOC);
+ ICmpInst::Predicate pred = isICMP_NE ?
+ ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
+ return new ICmpInst(pred, X, NegX);
+ }
+ }
+ default: break;
+ }
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) {
+ // Handle icmp {eq|ne} <intrinsic>, intcst.
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::bswap:
+ Worklist.Add(II);
+ ICI.setOperand(0, II->getArgOperand(0));
+ ICI.setOperand(1, ConstantInt::get(II->getContext(), RHSV.byteSwap()));
+ return &ICI;
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz:
+ // ctz(A) == bitwidth(a) -> A == 0 and likewise for !=
+ if (RHSV == RHS->getType()->getBitWidth()) {
+ Worklist.Add(II);
+ ICI.setOperand(0, II->getArgOperand(0));
+ ICI.setOperand(1, ConstantInt::get(RHS->getType(), 0));
+ return &ICI;
+ }
+ break;
+ case Intrinsic::ctpop:
+ // popcount(A) == 0 -> A == 0 and likewise for !=
+ if (RHS->isZero()) {
+ Worklist.Add(II);
+ ICI.setOperand(0, II->getArgOperand(0));
+ ICI.setOperand(1, RHS);
+ return &ICI;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+/// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst).
+/// We only handle extending casts so far.
+///
+Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
+ const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0));
+ Value *LHSCIOp = LHSCI->getOperand(0);
+ const Type *SrcTy = LHSCIOp->getType();
+ const Type *DestTy = LHSCI->getType();
+ Value *RHSCIOp;
+
+ // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
+ // integer type is the same size as the pointer type.
+ if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
+ TD->getPointerSizeInBits() ==
+ cast<IntegerType>(DestTy)->getBitWidth()) {
+ Value *RHSOp = 0;
+ if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
+ RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
+ } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) {
+ RHSOp = RHSC->getOperand(0);
+ // If the pointer types don't match, insert a bitcast.
+ if (LHSCIOp->getType() != RHSOp->getType())
+ RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType());
+ }
+
+ if (RHSOp)
+ return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp);
+ }
+
+ // The code below only handles extension cast instructions, so far.
+ // Enforce this.
+ if (LHSCI->getOpcode() != Instruction::ZExt &&
+ LHSCI->getOpcode() != Instruction::SExt)
+ return 0;
+
+ bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt;
+ bool isSignedCmp = ICI.isSigned();
+
+ if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) {
+ // Not an extension from the same type?
+ RHSCIOp = CI->getOperand(0);
+ if (RHSCIOp->getType() != LHSCIOp->getType())
+ return 0;
+
+ // If the signedness of the two casts doesn't agree (i.e. one is a sext
+ // and the other is a zext), then we can't handle this.
+ if (CI->getOpcode() != LHSCI->getOpcode())
+ return 0;
+
+ // Deal with equality cases early.
+ if (ICI.isEquality())
+ return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
+
+ // A signed comparison of sign extended values simplifies into a
+ // signed comparison.
+ if (isSignedCmp && isSignedExt)
+ return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
+
+ // The other three cases all fold into an unsigned comparison.
+ return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, RHSCIOp);
+ }
+
+ // If we aren't dealing with a constant on the RHS, exit early
+ ConstantInt *CI = dyn_cast<ConstantInt>(ICI.getOperand(1));
+ if (!CI)
+ return 0;
+
+ // Compute the constant that would happen if we truncated to SrcTy then
+ // reextended to DestTy.
+ Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy);
+ Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(),
+ Res1, DestTy);
+
+ // If the re-extended constant didn't change...
+ if (Res2 == CI) {
+ // Deal with equality cases early.
+ if (ICI.isEquality())
+ return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1);
+
+ // A signed comparison of sign extended values simplifies into a
+ // signed comparison.
+ if (isSignedExt && isSignedCmp)
+ return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1);
+
+ // The other three cases all fold into an unsigned comparison.
+ return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1);
+ }
+
+ // The re-extended constant changed so the constant cannot be represented
+ // in the shorter type. Consequently, we cannot emit a simple comparison.
+
+ // First, handle some easy cases. We know the result cannot be equal at this
+ // point so handle the ICI.isEquality() cases
+ if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+ if (ICI.getPredicate() == ICmpInst::ICMP_NE)
+ return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+
+ // Evaluate the comparison for LT (we invert for GT below). LE and GE cases
+ // should have been folded away previously and not enter in here.
+ Value *Result;
+ if (isSignedCmp) {
+ // We're performing a signed comparison.
+ if (cast<ConstantInt>(CI)->getValue().isNegative())
+ Result = ConstantInt::getFalse(ICI.getContext()); // X < (small) --> false
+ else
+ Result = ConstantInt::getTrue(ICI.getContext()); // X < (large) --> true
+ } else {
+ // We're performing an unsigned comparison.
+ if (isSignedExt) {
+ // We're performing an unsigned comp with a sign extended value.
+ // This is true if the input is >= 0. [aka >s -1]
+ Constant *NegOne = Constant::getAllOnesValue(SrcTy);
+ Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName());
+ } else {
+ // Unsigned extend & unsigned compare -> always true.
+ Result = ConstantInt::getTrue(ICI.getContext());
+ }
+ }
+
+ // Finally, return the value computed.
+ if (ICI.getPredicate() == ICmpInst::ICMP_ULT ||
+ ICI.getPredicate() == ICmpInst::ICMP_SLT)
+ return ReplaceInstUsesWith(ICI, Result);
+
+ assert((ICI.getPredicate()==ICmpInst::ICMP_UGT ||
+ ICI.getPredicate()==ICmpInst::ICMP_SGT) &&
+ "ICmp should be folded!");
+ if (Constant *CI = dyn_cast<Constant>(Result))
+ return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI));
+ return BinaryOperator::CreateNot(Result);
+}
+
+
+
+Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
+ bool Changed = false;
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ /// Orders the operands of the compare so that they are listed from most
+ /// complex to least complex. This puts constants before unary operators,
+ /// before binary operators.
+ if (getComplexity(Op0) < getComplexity(Op1)) {
+ I.swapOperands();
+ std::swap(Op0, Op1);
+ Changed = true;
+ }
+
+ if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ const Type *Ty = Op0->getType();
+
+ // icmp's with boolean values can always be turned into bitwise operations
+ if (Ty->isIntegerTy(1)) {
+ switch (I.getPredicate()) {
+ default: llvm_unreachable("Invalid icmp instruction!");
+ case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B)
+ Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp");
+ return BinaryOperator::CreateNot(Xor);
+ }
+ case ICmpInst::ICMP_NE: // icmp eq i1 A, B -> A^B
+ return BinaryOperator::CreateXor(Op0, Op1);
+
+ case ICmpInst::ICMP_UGT:
+ std::swap(Op0, Op1); // Change icmp ugt -> icmp ult
+ // FALL THROUGH
+ case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B
+ Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp");
+ return BinaryOperator::CreateAnd(Not, Op1);
+ }
+ case ICmpInst::ICMP_SGT:
+ std::swap(Op0, Op1); // Change icmp sgt -> icmp slt
+ // FALL THROUGH
+ case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B
+ Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp");
+ return BinaryOperator::CreateAnd(Not, Op0);
+ }
+ case ICmpInst::ICMP_UGE:
+ std::swap(Op0, Op1); // Change icmp uge -> icmp ule
+ // FALL THROUGH
+ case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B
+ Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp");
+ return BinaryOperator::CreateOr(Not, Op1);
+ }
+ case ICmpInst::ICMP_SGE:
+ std::swap(Op0, Op1); // Change icmp sge -> icmp sle
+ // FALL THROUGH
+ case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B
+ Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp");
+ return BinaryOperator::CreateOr(Not, Op0);
+ }
+ }
+ }
+
+ unsigned BitWidth = 0;
+ if (TD)
+ BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
+ else if (Ty->isIntOrIntVectorTy())
+ BitWidth = Ty->getScalarSizeInBits();
+
+ bool isSignBit = false;
+
+ // See if we are doing a comparison with a constant.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ Value *A = 0, *B = 0;
+
+ // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
+ if (I.isEquality() && CI->isZero() &&
+ match(Op0, m_Sub(m_Value(A), m_Value(B)))) {
+ // (icmp cond A B) if cond is equality
+ return new ICmpInst(I.getPredicate(), A, B);
+ }
+
+ // If we have an icmp le or icmp ge instruction, turn it into the
+ // appropriate icmp lt or icmp gt instruction. This allows us to rely on
+ // them being folded in the code below. The SimplifyICmpInst code has
+ // already handled the edge cases for us, so we just assert on them.
+ switch (I.getPredicate()) {
+ default: break;
+ case ICmpInst::ICMP_ULE:
+ assert(!CI->isMaxValue(false)); // A <=u MAX -> TRUE
+ return new ICmpInst(ICmpInst::ICMP_ULT, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()+1));
+ case ICmpInst::ICMP_SLE:
+ assert(!CI->isMaxValue(true)); // A <=s MAX -> TRUE
+ return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()+1));
+ case ICmpInst::ICMP_UGE:
+ assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE
+ return new ICmpInst(ICmpInst::ICMP_UGT, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()-1));
+ case ICmpInst::ICMP_SGE:
+ assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE
+ return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()-1));
+ }
+
+ // If this comparison is a normal comparison, it demands all
+ // bits, if it is a sign bit comparison, it only demands the sign bit.
+ bool UnusedBit;
+ isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit);
+ }
+
+ // See if we can fold the comparison based on range information we can get
+ // by checking whether bits are known to be zero or one in the input.
+ if (BitWidth != 0) {
+ APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0);
+ APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0);
+
+ if (SimplifyDemandedBits(I.getOperandUse(0),
+ isSignBit ? APInt::getSignBit(BitWidth)
+ : APInt::getAllOnesValue(BitWidth),
+ Op0KnownZero, Op0KnownOne, 0))
+ return &I;
+ if (SimplifyDemandedBits(I.getOperandUse(1),
+ APInt::getAllOnesValue(BitWidth),
+ Op1KnownZero, Op1KnownOne, 0))
+ return &I;
+
+ // Given the known and unknown bits, compute a range that the LHS could be
+ // in. Compute the Min, Max and RHS values based on the known bits. For the
+ // EQ and NE we use unsigned values.
+ APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
+ APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
+ if (I.isSigned()) {
+ ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
+ Op0Min, Op0Max);
+ ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
+ Op1Min, Op1Max);
+ } else {
+ ComputeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
+ Op0Min, Op0Max);
+ ComputeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
+ Op1Min, Op1Max);
+ }
+
+ // If Min and Max are known to be the same, then SimplifyDemandedBits
+ // figured out that the LHS is a constant. Just constant fold this now so
+ // that code below can assume that Min != Max.
+ if (!isa<Constant>(Op0) && Op0Min == Op0Max)
+ return new ICmpInst(I.getPredicate(),
+ ConstantInt::get(I.getContext(), Op0Min), Op1);
+ if (!isa<Constant>(Op1) && Op1Min == Op1Max)
+ return new ICmpInst(I.getPredicate(), Op0,
+ ConstantInt::get(I.getContext(), Op1Min));
+
+ // Based on the range information we know about the LHS, see if we can
+ // simplify this comparison. For example, (x&4) < 8 is always true.
+ switch (I.getPredicate()) {
+ default: llvm_unreachable("Unknown icmp opcode!");
+ case ICmpInst::ICMP_EQ:
+ if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ break;
+ case ICmpInst::ICMP_NE:
+ if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ break;
+ case ICmpInst::ICMP_ULT:
+ if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ if (Op1Max == Op0Min+1) // A <u C -> A == C-1 if min(A)+1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()-1));
+
+ // (x <u 2147483648) -> (x >s -1) -> true if sign bit clear
+ if (CI->isMinValue(true))
+ return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
+ Constant::getAllOnesValue(Op0->getType()));
+ }
+ break;
+ case ICmpInst::ICMP_UGT:
+ if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+
+ if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ if (Op1Min == Op0Max-1) // A >u C -> A == C+1 if max(a)-1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()+1));
+
+ // (x >u 2147483647) -> (x <s 0) -> true if sign bit set
+ if (CI->isMaxValue(true))
+ return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
+ Constant::getNullValue(Op0->getType()));
+ }
+ break;
+ case ICmpInst::ICMP_SLT:
+ if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ if (Op1Max == Op0Min+1) // A <s C -> A == C-1 if min(A)+1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()-1));
+ }
+ break;
+ case ICmpInst::ICMP_SGT:
+ if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+
+ if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ if (Op1Min == Op0Max-1) // A >s C -> A == C+1 if max(A)-1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(CI->getContext(), CI->getValue()+1));
+ }
+ break;
+ case ICmpInst::ICMP_SGE:
+ assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
+ if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ break;
+ case ICmpInst::ICMP_SLE:
+ assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
+ if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ break;
+ case ICmpInst::ICMP_UGE:
+ assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
+ if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ break;
+ case ICmpInst::ICMP_ULE:
+ assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
+ if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B)
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ break;
+ }
+
+ // Turn a signed comparison into an unsigned one if both operands
+ // are known to have the same sign.
+ if (I.isSigned() &&
+ ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) ||
+ (Op0KnownOne.isNegative() && Op1KnownOne.isNegative())))
+ return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
+ }
+
+ // Test if the ICmpInst instruction is used exclusively by a select as
+ // part of a minimum or maximum operation. If so, refrain from doing
+ // any other folding. This helps out other analyses which understand
+ // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
+ // and CodeGen. And in this case, at least one of the comparison
+ // operands has at least one user besides the compare (the select),
+ // which would often largely negate the benefit of folding anyway.
+ if (I.hasOneUse())
+ if (SelectInst *SI = dyn_cast<SelectInst>(*I.use_begin()))
+ if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
+ (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+ return 0;
+
+ // See if we are doing a comparison between a constant and an instruction that
+ // can be folded into the comparison.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ // Since the RHS is a ConstantInt (CI), if the left hand side is an
+ // instruction, see if that instruction also has constants so that the
+ // instruction can be folded into the icmp
+ if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+ if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI))
+ return Res;
+ }
+
+ // Handle icmp with constant (but not simple integer constant) RHS
+ if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
+ if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+ switch (LHSI->getOpcode()) {
+ case Instruction::GetElementPtr:
+ // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null
+ if (RHSC->isNullValue() &&
+ cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices())
+ return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
+ Constant::getNullValue(LHSI->getOperand(0)->getType()));
+ break;
+ case Instruction::PHI:
+ // Only fold icmp into the PHI if the phi and icmp are in the same
+ // block. If in the same block, we're encouraging jump threading. If
+ // not, we are just pessimizing the code by making an i1 phi.
+ if (LHSI->getParent() == I.getParent())
+ if (Instruction *NV = FoldOpIntoPhi(I, true))
+ return NV;
+ break;
+ case Instruction::Select: {
+ // If either operand of the select is a constant, we can fold the
+ // comparison into the select arms, which will cause one to be
+ // constant folded and the select turned into a bitwise or.
+ Value *Op1 = 0, *Op2 = 0;
+ if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1)))
+ Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
+ if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2)))
+ Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
+
+ // We only want to perform this transformation if it will not lead to
+ // additional code. This is true if either both sides of the select
+ // fold to a constant (in which case the icmp is replaced with a select
+ // which will usually simplify) or this is the only user of the
+ // select (in which case we are trading a select+icmp for a simpler
+ // select+icmp).
+ if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) {
+ if (!Op1)
+ Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1),
+ RHSC, I.getName());
+ if (!Op2)
+ Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2),
+ RHSC, I.getName());
+ return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
+ }
+ break;
+ }
+ case Instruction::IntToPtr:
+ // icmp pred inttoptr(X), null -> icmp pred X, 0
+ if (RHSC->isNullValue() && TD &&
+ TD->getIntPtrType(RHSC->getContext()) ==
+ LHSI->getOperand(0)->getType())
+ return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
+ Constant::getNullValue(LHSI->getOperand(0)->getType()));
+ break;
+
+ case Instruction::Load:
+ // Try to optimize things like "A[i] > 4" to index computations.
+ if (GetElementPtrInst *GEP =
+ dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+ !cast<LoadInst>(LHSI)->isVolatile())
+ if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
+ return Res;
+ }
+ break;
+ }
+ }
+
+ // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now.
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0))
+ if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I))
+ return NI;
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1))
+ if (Instruction *NI = FoldGEPICmp(GEP, Op0,
+ ICmpInst::getSwappedPredicate(I.getPredicate()), I))
+ return NI;
+
+ // Test to see if the operands of the icmp are casted versions of other
+ // values. If the ptr->ptr cast can be stripped off both arguments, we do so
+ // now.
+ if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) {
+ if (Op0->getType()->isPointerTy() &&
+ (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
+ // We keep moving the cast from the left operand over to the right
+ // operand, where it can often be eliminated completely.
+ Op0 = CI->getOperand(0);
+
+ // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
+ // so eliminate it as well.
+ if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1))
+ Op1 = CI2->getOperand(0);
+
+ // If Op1 is a constant, we can fold the cast into the constant.
+ if (Op0->getType() != Op1->getType()) {
+ if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+ Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
+ } else {
+ // Otherwise, cast the RHS right before the icmp
+ Op1 = Builder->CreateBitCast(Op1, Op0->getType());
+ }
+ }
+ return new ICmpInst(I.getPredicate(), Op0, Op1);
+ }
+ }
+
+ if (isa<CastInst>(Op0)) {
+ // Handle the special case of: icmp (cast bool to X), <cst>
+ // This comes up when you have code like
+ // int X = A < B;
+ // if (X) ...
+ // For generality, we handle any zero-extension of any operand comparison
+ // with a constant or another cast from the same type.
+ if (isa<Constant>(Op1) || isa<CastInst>(Op1))
+ if (Instruction *R = visitICmpInstWithCastAndCast(I))
+ return R;
+ }
+
+ // See if it's the same type of instruction on the left and right.
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+ if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
+ if (Op0I->getOpcode() == Op1I->getOpcode() && Op0I->hasOneUse() &&
+ Op1I->hasOneUse() && Op0I->getOperand(1) == Op1I->getOperand(1)) {
+ switch (Op0I->getOpcode()) {
+ default: break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Xor:
+ if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
+ return new ICmpInst(I.getPredicate(), Op0I->getOperand(0),
+ Op1I->getOperand(0));
+ // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
+ if (CI->getValue().isSignBit()) {
+ ICmpInst::Predicate Pred = I.isSigned()
+ ? I.getUnsignedPredicate()
+ : I.getSignedPredicate();
+ return new ICmpInst(Pred, Op0I->getOperand(0),
+ Op1I->getOperand(0));
+ }
+
+ if (CI->getValue().isMaxSignedValue()) {
+ ICmpInst::Predicate Pred = I.isSigned()
+ ? I.getUnsignedPredicate()
+ : I.getSignedPredicate();
+ Pred = I.getSwappedPredicate(Pred);
+ return new ICmpInst(Pred, Op0I->getOperand(0),
+ Op1I->getOperand(0));
+ }
+ }
+ break;
+ case Instruction::Mul:
+ if (!I.isEquality())
+ break;
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
+ // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
+ // Mask = -1 >> count-trailing-zeros(Cst).
+ if (!CI->isZero() && !CI->isOne()) {
+ const APInt &AP = CI->getValue();
+ ConstantInt *Mask = ConstantInt::get(I.getContext(),
+ APInt::getLowBitsSet(AP.getBitWidth(),
+ AP.getBitWidth() -
+ AP.countTrailingZeros()));
+ Value *And1 = Builder->CreateAnd(Op0I->getOperand(0), Mask);
+ Value *And2 = Builder->CreateAnd(Op1I->getOperand(0), Mask);
+ return new ICmpInst(I.getPredicate(), And1, And2);
+ }
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ // ~x < ~y --> y < x
+ { Value *A, *B;
+ if (match(Op0, m_Not(m_Value(A))) &&
+ match(Op1, m_Not(m_Value(B))))
+ return new ICmpInst(I.getPredicate(), B, A);
+ }
+
+ if (I.isEquality()) {
+ Value *A, *B, *C, *D;
+
+ // -x == -y --> x == y
+ if (match(Op0, m_Neg(m_Value(A))) &&
+ match(Op1, m_Neg(m_Value(B))))
+ return new ICmpInst(I.getPredicate(), A, B);
+
+ if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
+ if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0
+ Value *OtherVal = A == Op1 ? B : A;
+ return new ICmpInst(I.getPredicate(), OtherVal,
+ Constant::getNullValue(A->getType()));
+ }
+
+ if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
+ // A^c1 == C^c2 --> A == C^(c1^c2)
+ ConstantInt *C1, *C2;
+ if (match(B, m_ConstantInt(C1)) &&
+ match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) {
+ Constant *NC = ConstantInt::get(I.getContext(),
+ C1->getValue() ^ C2->getValue());
+ Value *Xor = Builder->CreateXor(C, NC, "tmp");
+ return new ICmpInst(I.getPredicate(), A, Xor);
+ }
+
+ // A^B == A^D -> B == D
+ if (A == C) return new ICmpInst(I.getPredicate(), B, D);
+ if (A == D) return new ICmpInst(I.getPredicate(), B, C);
+ if (B == C) return new ICmpInst(I.getPredicate(), A, D);
+ if (B == D) return new ICmpInst(I.getPredicate(), A, C);
+ }
+ }
+
+ if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
+ (A == Op0 || B == Op0)) {
+ // A == (A^B) -> B == 0
+ Value *OtherVal = A == Op0 ? B : A;
+ return new ICmpInst(I.getPredicate(), OtherVal,
+ Constant::getNullValue(A->getType()));
+ }
+
+ // (A-B) == A -> B == 0
+ if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B))))
+ return new ICmpInst(I.getPredicate(), B,
+ Constant::getNullValue(B->getType()));
+
+ // A == (A-B) -> B == 0
+ if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B))))
+ return new ICmpInst(I.getPredicate(), B,
+ Constant::getNullValue(B->getType()));
+
+ // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
+ if (Op0->hasOneUse() && Op1->hasOneUse() &&
+ match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1, m_And(m_Value(C), m_Value(D)))) {
+ Value *X = 0, *Y = 0, *Z = 0;
+
+ if (A == C) {
+ X = B; Y = D; Z = A;
+ } else if (A == D) {
+ X = B; Y = C; Z = A;
+ } else if (B == C) {
+ X = A; Y = D; Z = B;
+ } else if (B == D) {
+ X = A; Y = C; Z = B;
+ }
+
+ if (X) { // Build (X^Y) & Z
+ Op1 = Builder->CreateXor(X, Y, "tmp");
+ Op1 = Builder->CreateAnd(Op1, Z, "tmp");
+ I.setOperand(0, Op1);
+ I.setOperand(1, Constant::getNullValue(Op1->getType()));
+ return &I;
+ }
+ }
+ }
+
+ {
+ Value *X; ConstantInt *Cst;
+ // icmp X+Cst, X
+ if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)
+ return FoldICmpAddOpCst(I, X, Cst, I.getPredicate(), Op0);
+
+ // icmp X, X+Cst
+ if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
+ return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate(), Op1);
+ }
+ return Changed ? &I : 0;
+}
+
+
+
+
+
+
+/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible.
+///
+Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
+ Instruction *LHSI,
+ Constant *RHSC) {
+ if (!isa<ConstantFP>(RHSC)) return 0;
+ const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
+
+ // Get the width of the mantissa. We don't want to hack on conversions that
+ // might lose information from the integer, e.g. "i64 -> float"
+ int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
+ if (MantissaWidth == -1) return 0; // Unknown.
+
+ // Check to see that the input is converted from an integer type that is small
+ // enough that preserves all bits. TODO: check here for "known" sign bits.
+ // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
+ unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits();
+
+ // If this is a uitofp instruction, we need an extra bit to hold the sign.
+ bool LHSUnsigned = isa<UIToFPInst>(LHSI);
+ if (LHSUnsigned)
+ ++InputSize;
+
+ // If the conversion would lose info, don't hack on this.
+ if ((int)InputSize > MantissaWidth)
+ return 0;
+
+ // Otherwise, we can potentially simplify the comparison. We know that it
+ // will always come through as an integer value and we know the constant is
+ // not a NAN (it would have been previously simplified).
+ assert(!RHS.isNaN() && "NaN comparison not already folded!");
+
+ ICmpInst::Predicate Pred;
+ switch (I.getPredicate()) {
+ default: llvm_unreachable("Unexpected predicate!");
+ case FCmpInst::FCMP_UEQ:
+ case FCmpInst::FCMP_OEQ:
+ Pred = ICmpInst::ICMP_EQ;
+ break;
+ case FCmpInst::FCMP_UGT:
+ case FCmpInst::FCMP_OGT:
+ Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT;
+ break;
+ case FCmpInst::FCMP_UGE:
+ case FCmpInst::FCMP_OGE:
+ Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE;
+ break;
+ case FCmpInst::FCMP_ULT:
+ case FCmpInst::FCMP_OLT:
+ Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT;
+ break;
+ case FCmpInst::FCMP_ULE:
+ case FCmpInst::FCMP_OLE:
+ Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE;
+ break;
+ case FCmpInst::FCMP_UNE:
+ case FCmpInst::FCMP_ONE:
+ Pred = ICmpInst::ICMP_NE;
+ break;
+ case FCmpInst::FCMP_ORD:
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ case FCmpInst::FCMP_UNO:
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ }
+
+ const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
+
+ // Now we know that the APFloat is a normal number, zero or inf.
+
+ // See if the FP constant is too large for the integer. For example,
+ // comparing an i8 to 300.0.
+ unsigned IntWidth = IntTy->getScalarSizeInBits();
+
+ if (!LHSUnsigned) {
+ // If the RHS value is > SignedMax, fold the comparison. This handles +INF
+ // and large values.
+ APFloat SMax(RHS.getSemantics(), APFloat::fcZero, false);
+ SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true,
+ APFloat::rmNearestTiesToEven);
+ if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0
+ if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT ||
+ Pred == ICmpInst::ICMP_SLE)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ }
+ } else {
+ // If the RHS value is > UnsignedMax, fold the comparison. This handles
+ // +INF and large values.
+ APFloat UMax(RHS.getSemantics(), APFloat::fcZero, false);
+ UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false,
+ APFloat::rmNearestTiesToEven);
+ if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0
+ if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT ||
+ Pred == ICmpInst::ICMP_ULE)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ }
+ }
+
+ if (!LHSUnsigned) {
+ // See if the RHS value is < SignedMin.
+ APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false);
+ SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true,
+ APFloat::rmNearestTiesToEven);
+ if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
+ if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
+ Pred == ICmpInst::ICMP_SGE)
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ }
+ }
+
+ // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or
+ // [0, UMAX], but it may still be fractional. See if it is fractional by
+ // casting the FP value to the integer value and back, checking for equality.
+ // Don't do this for zero, because -0.0 is not fractional.
+ Constant *RHSInt = LHSUnsigned
+ ? ConstantExpr::getFPToUI(RHSC, IntTy)
+ : ConstantExpr::getFPToSI(RHSC, IntTy);
+ if (!RHS.isZero()) {
+ bool Equal = LHSUnsigned
+ ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC
+ : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC;
+ if (!Equal) {
+ // If we had a comparison against a fractional value, we have to adjust
+ // the compare predicate and sometimes the value. RHSC is rounded towards
+ // zero at this point.
+ switch (Pred) {
+ default: llvm_unreachable("Unexpected integer comparison!");
+ case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ case ICmpInst::ICMP_ULE:
+ // (float)int <= 4.4 --> int <= 4
+ // (float)int <= -4.4 --> false
+ if (RHS.isNegative())
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ break;
+ case ICmpInst::ICMP_SLE:
+ // (float)int <= 4.4 --> int <= 4
+ // (float)int <= -4.4 --> int < -4
+ if (RHS.isNegative())
+ Pred = ICmpInst::ICMP_SLT;
+ break;
+ case ICmpInst::ICMP_ULT:
+ // (float)int < -4.4 --> false
+ // (float)int < 4.4 --> int <= 4
+ if (RHS.isNegative())
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ Pred = ICmpInst::ICMP_ULE;
+ break;
+ case ICmpInst::ICMP_SLT:
+ // (float)int < -4.4 --> int < -4
+ // (float)int < 4.4 --> int <= 4
+ if (!RHS.isNegative())
+ Pred = ICmpInst::ICMP_SLE;
+ break;
+ case ICmpInst::ICMP_UGT:
+ // (float)int > 4.4 --> int > 4
+ // (float)int > -4.4 --> true
+ if (RHS.isNegative())
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ break;
+ case ICmpInst::ICMP_SGT:
+ // (float)int > 4.4 --> int > 4
+ // (float)int > -4.4 --> int >= -4
+ if (RHS.isNegative())
+ Pred = ICmpInst::ICMP_SGE;
+ break;
+ case ICmpInst::ICMP_UGE:
+ // (float)int >= -4.4 --> true
+ // (float)int >= 4.4 --> int > 4
+ if (!RHS.isNegative())
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ Pred = ICmpInst::ICMP_UGT;
+ break;
+ case ICmpInst::ICMP_SGE:
+ // (float)int >= -4.4 --> int >= -4
+ // (float)int >= 4.4 --> int > 4
+ if (!RHS.isNegative())
+ Pred = ICmpInst::ICMP_SGT;
+ break;
+ }
+ }
+ }
+
+ // Lower this FP comparison into an appropriate integer version of the
+ // comparison.
+ return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);
+}
+
+Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
+ bool Changed = false;
+
+ /// Orders the operands of the compare so that they are listed from most
+ /// complex to least complex. This puts constants before unary operators,
+ /// before binary operators.
+ if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
+ I.swapOperands();
+ Changed = true;
+ }
+
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // Simplify 'fcmp pred X, X'
+ if (Op0 == Op1) {
+ switch (I.getPredicate()) {
+ default: llvm_unreachable("Unknown predicate!");
+ case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y)
+ case FCmpInst::FCMP_ULT: // True if unordered or less than
+ case FCmpInst::FCMP_UGT: // True if unordered or greater than
+ case FCmpInst::FCMP_UNE: // True if unordered or not equal
+ // Canonicalize these to be 'fcmp uno %X, 0.0'.
+ I.setPredicate(FCmpInst::FCMP_UNO);
+ I.setOperand(1, Constant::getNullValue(Op0->getType()));
+ return &I;
+
+ case FCmpInst::FCMP_ORD: // True if ordered (no nans)
+ case FCmpInst::FCMP_OEQ: // True if ordered and equal
+ case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal
+ case FCmpInst::FCMP_OLE: // True if ordered and less than or equal
+ // Canonicalize these to be 'fcmp ord %X, 0.0'.
+ I.setPredicate(FCmpInst::FCMP_ORD);
+ I.setOperand(1, Constant::getNullValue(Op0->getType()));
+ return &I;
+ }
+ }
+
+ // Handle fcmp with constant RHS
+ if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
+ if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+ switch (LHSI->getOpcode()) {
+ case Instruction::PHI:
+ // Only fold fcmp into the PHI if the phi and fcmp are in the same
+ // block. If in the same block, we're encouraging jump threading. If
+ // not, we are just pessimizing the code by making an i1 phi.
+ if (LHSI->getParent() == I.getParent())
+ if (Instruction *NV = FoldOpIntoPhi(I, true))
+ return NV;
+ break;
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC))
+ return NV;
+ break;
+ case Instruction::Select: {
+ // If either operand of the select is a constant, we can fold the
+ // comparison into the select arms, which will cause one to be
+ // constant folded and the select turned into a bitwise or.
+ Value *Op1 = 0, *Op2 = 0;
+ if (LHSI->hasOneUse()) {
+ if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
+ // Fold the known value into the constant operand.
+ Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
+ // Insert a new FCmp of the other select operand.
+ Op2 = Builder->CreateFCmp(I.getPredicate(),
+ LHSI->getOperand(2), RHSC, I.getName());
+ } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
+ // Fold the known value into the constant operand.
+ Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
+ // Insert a new FCmp of the other select operand.
+ Op1 = Builder->CreateFCmp(I.getPredicate(), LHSI->getOperand(1),
+ RHSC, I.getName());
+ }
+ }
+
+ if (Op1)
+ return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
+ break;
+ }
+ case Instruction::Load:
+ if (GetElementPtrInst *GEP =
+ dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+ !cast<LoadInst>(LHSI)->isVolatile())
+ if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
+ return Res;
+ }
+ break;
+ }
+ }
+
+ return Changed ? &I : 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
new file mode 100644
index 0000000..b68fbc2
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -0,0 +1,639 @@
+//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for load, store and alloca.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumDeadStore, "Number of dead stores eliminated");
+
+Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
+ // Ensure that the alloca array size argument has type intptr_t, so that
+ // any casting is exposed early.
+ if (TD) {
+ const Type *IntPtrTy = TD->getIntPtrType(AI.getContext());
+ if (AI.getArraySize()->getType() != IntPtrTy) {
+ Value *V = Builder->CreateIntCast(AI.getArraySize(),
+ IntPtrTy, false);
+ AI.setOperand(0, V);
+ return &AI;
+ }
+ }
+
+ // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
+ if (AI.isArrayAllocation()) { // Check C != 1
+ if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
+ const Type *NewTy =
+ ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
+ assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!");
+ AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
+ New->setAlignment(AI.getAlignment());
+
+ // Scan to the end of the allocation instructions, to skip over a block of
+ // allocas if possible...also skip interleaved debug info
+ //
+ BasicBlock::iterator It = New;
+ while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
+
+ // Now that I is pointing to the first non-allocation-inst in the block,
+ // insert our getelementptr instruction...
+ //
+ Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext()));
+ Value *Idx[2];
+ Idx[0] = NullIdx;
+ Idx[1] = NullIdx;
+ Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2,
+ New->getName()+".sub", It);
+
+ // Now make everything use the getelementptr instead of the original
+ // allocation.
+ return ReplaceInstUsesWith(AI, V);
+ } else if (isa<UndefValue>(AI.getArraySize())) {
+ return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
+ }
+ }
+
+ if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) {
+ // If alloca'ing a zero byte object, replace the alloca with a null pointer.
+ // Note that we only do this for alloca's, because malloc should allocate
+ // and return a unique pointer, even for a zero byte allocation.
+ if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0)
+ return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
+
+ // If the alignment is 0 (unspecified), assign it the preferred alignment.
+ if (AI.getAlignment() == 0)
+ AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType()));
+ }
+
+ return 0;
+}
+
+
+/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
+static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
+ const TargetData *TD) {
+ User *CI = cast<User>(LI.getOperand(0));
+ Value *CastOp = CI->getOperand(0);
+
+ const PointerType *DestTy = cast<PointerType>(CI->getType());
+ const Type *DestPTy = DestTy->getElementType();
+ if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
+
+ // If the address spaces don't match, don't eliminate the cast.
+ if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
+ return 0;
+
+ const Type *SrcPTy = SrcTy->getElementType();
+
+ if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
+ DestPTy->isVectorTy()) {
+ // If the source is an array, the code below will not succeed. Check to
+ // see if a trivial 'gep P, 0, 0' will help matters. Only do this for
+ // constants.
+ if (const ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
+ if (Constant *CSrc = dyn_cast<Constant>(CastOp))
+ if (ASrcTy->getNumElements() != 0) {
+ Value *Idxs[2];
+ Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext()));
+ Idxs[1] = Idxs[0];
+ CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2);
+ SrcTy = cast<PointerType>(CastOp->getType());
+ SrcPTy = SrcTy->getElementType();
+ }
+
+ if (IC.getTargetData() &&
+ (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
+ SrcPTy->isVectorTy()) &&
+ // Do not allow turning this into a load of an integer, which is then
+ // casted to a pointer, this pessimizes pointer analysis a lot.
+ (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) &&
+ IC.getTargetData()->getTypeSizeInBits(SrcPTy) ==
+ IC.getTargetData()->getTypeSizeInBits(DestPTy)) {
+
+ // Okay, we are casting from one integer or pointer type to another of
+ // the same size. Instead of casting the pointer before the load, cast
+ // the result of the loaded value.
+ LoadInst *NewLoad =
+ IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
+ NewLoad->setAlignment(LI.getAlignment());
+ // Now cast the result of the load.
+ return new BitCastInst(NewLoad, LI.getType());
+ }
+ }
+ }
+ return 0;
+}
+
+Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
+ Value *Op = LI.getOperand(0);
+
+ // Attempt to improve the alignment.
+ if (TD) {
+ unsigned KnownAlign =
+ GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()));
+ unsigned LoadAlign = LI.getAlignment();
+ unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
+ TD->getABITypeAlignment(LI.getType());
+
+ if (KnownAlign > EffectiveLoadAlign)
+ LI.setAlignment(KnownAlign);
+ else if (LoadAlign == 0)
+ LI.setAlignment(EffectiveLoadAlign);
+ }
+
+ // load (cast X) --> cast (load X) iff safe.
+ if (isa<CastInst>(Op))
+ if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
+ return Res;
+
+ // None of the following transforms are legal for volatile loads.
+ if (LI.isVolatile()) return 0;
+
+ // Do really simple store-to-load forwarding and load CSE, to catch cases
+ // where there are several consequtive memory accesses to the same location,
+ // separated by a few arithmetic operations.
+ BasicBlock::iterator BBI = &LI;
+ if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
+ return ReplaceInstUsesWith(LI, AvailableVal);
+
+ // load(gep null, ...) -> unreachable
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
+ const Value *GEPI0 = GEPI->getOperand(0);
+ // TODO: Consider a target hook for valid address spaces for this xform.
+ if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
+ // Insert a new store to null instruction before the load to indicate
+ // that this code is not reachable. We do this instead of inserting
+ // an unreachable instruction directly because we cannot modify the
+ // CFG.
+ new StoreInst(UndefValue::get(LI.getType()),
+ Constant::getNullValue(Op->getType()), &LI);
+ return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+ }
+ }
+
+ // load null/undef -> unreachable
+ // TODO: Consider a target hook for valid address spaces for this xform.
+ if (isa<UndefValue>(Op) ||
+ (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
+ // Insert a new store to null instruction before the load to indicate that
+ // this code is not reachable. We do this instead of inserting an
+ // unreachable instruction directly because we cannot modify the CFG.
+ new StoreInst(UndefValue::get(LI.getType()),
+ Constant::getNullValue(Op->getType()), &LI);
+ return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+ }
+
+ // Instcombine load (constantexpr_cast global) -> cast (load global)
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
+ if (CE->isCast())
+ if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
+ return Res;
+
+ if (Op->hasOneUse()) {
+ // Change select and PHI nodes to select values instead of addresses: this
+ // helps alias analysis out a lot, allows many others simplifications, and
+ // exposes redundancy in the code.
+ //
+ // Note that we cannot do the transformation unless we know that the
+ // introduced loads cannot trap! Something like this is valid as long as
+ // the condition is always false: load (select bool %C, int* null, int* %G),
+ // but it would not be valid if we transformed it to load from null
+ // unconditionally.
+ //
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
+ // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
+ unsigned Align = LI.getAlignment();
+ if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) &&
+ isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) {
+ LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
+ SI->getOperand(1)->getName()+".val");
+ LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
+ SI->getOperand(2)->getName()+".val");
+ V1->setAlignment(Align);
+ V2->setAlignment(Align);
+ return SelectInst::Create(SI->getCondition(), V1, V2);
+ }
+
+ // load (select (cond, null, P)) -> load P
+ if (Constant *C = dyn_cast<Constant>(SI->getOperand(1)))
+ if (C->isNullValue()) {
+ LI.setOperand(0, SI->getOperand(2));
+ return &LI;
+ }
+
+ // load (select (cond, P, null)) -> load P
+ if (Constant *C = dyn_cast<Constant>(SI->getOperand(2)))
+ if (C->isNullValue()) {
+ LI.setOperand(0, SI->getOperand(1));
+ return &LI;
+ }
+ }
+ }
+ return 0;
+}
+
+/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P
+/// when possible. This makes it generally easy to do alias analysis and/or
+/// SROA/mem2reg of the memory object.
+static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
+ User *CI = cast<User>(SI.getOperand(1));
+ Value *CastOp = CI->getOperand(0);
+
+ const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
+ const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
+ if (SrcTy == 0) return 0;
+
+ const Type *SrcPTy = SrcTy->getElementType();
+
+ if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
+ return 0;
+
+ /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
+ /// to its first element. This allows us to handle things like:
+ /// store i32 xxx, (bitcast {foo*, float}* %P to i32*)
+ /// on 32-bit hosts.
+ SmallVector<Value*, 4> NewGEPIndices;
+
+ // If the source is an array, the code below will not succeed. Check to
+ // see if a trivial 'gep P, 0, 0' will help matters. Only do this for
+ // constants.
+ if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) {
+ // Index through pointer.
+ Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
+ NewGEPIndices.push_back(Zero);
+
+ while (1) {
+ if (const StructType *STy = dyn_cast<StructType>(SrcPTy)) {
+ if (!STy->getNumElements()) /* Struct can be empty {} */
+ break;
+ NewGEPIndices.push_back(Zero);
+ SrcPTy = STy->getElementType(0);
+ } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) {
+ NewGEPIndices.push_back(Zero);
+ SrcPTy = ATy->getElementType();
+ } else {
+ break;
+ }
+ }
+
+ SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
+ }
+
+ if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
+ return 0;
+
+ // If the pointers point into different address spaces or if they point to
+ // values with different sizes, we can't do the transformation.
+ if (!IC.getTargetData() ||
+ SrcTy->getAddressSpace() !=
+ cast<PointerType>(CI->getType())->getAddressSpace() ||
+ IC.getTargetData()->getTypeSizeInBits(SrcPTy) !=
+ IC.getTargetData()->getTypeSizeInBits(DestPTy))
+ return 0;
+
+ // Okay, we are casting from one integer or pointer type to another of
+ // the same size. Instead of casting the pointer before
+ // the store, cast the value to be stored.
+ Value *NewCast;
+ Value *SIOp0 = SI.getOperand(0);
+ Instruction::CastOps opcode = Instruction::BitCast;
+ const Type* CastSrcTy = SIOp0->getType();
+ const Type* CastDstTy = SrcPTy;
+ if (CastDstTy->isPointerTy()) {
+ if (CastSrcTy->isIntegerTy())
+ opcode = Instruction::IntToPtr;
+ } else if (CastDstTy->isIntegerTy()) {
+ if (SIOp0->getType()->isPointerTy())
+ opcode = Instruction::PtrToInt;
+ }
+
+ // SIOp0 is a pointer to aggregate and this is a store to the first field,
+ // emit a GEP to index into its first field.
+ if (!NewGEPIndices.empty())
+ CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(),
+ NewGEPIndices.end());
+
+ NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
+ SIOp0->getName()+".c");
+ return new StoreInst(NewCast, CastOp);
+}
+
+/// equivalentAddressValues - Test if A and B will obviously have the same
+/// value. This includes recognizing that %t0 and %t1 will have the same
+/// value in code like this:
+/// %t0 = getelementptr \@a, 0, 3
+/// store i32 0, i32* %t0
+/// %t1 = getelementptr \@a, 0, 3
+/// %t2 = load i32* %t1
+///
+static bool equivalentAddressValues(Value *A, Value *B) {
+ // Test if the values are trivially equivalent.
+ if (A == B) return true;
+
+ // Test if the values come form identical arithmetic instructions.
+ // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
+ // its only used to compare two uses within the same basic block, which
+ // means that they'll always either have the same value or one of them
+ // will have an undefined value.
+ if (isa<BinaryOperator>(A) ||
+ isa<CastInst>(A) ||
+ isa<PHINode>(A) ||
+ isa<GetElementPtrInst>(A))
+ if (Instruction *BI = dyn_cast<Instruction>(B))
+ if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
+ return true;
+
+ // Otherwise they may not be equivalent.
+ return false;
+}
+
+// If this instruction has two uses, one of which is a llvm.dbg.declare,
+// return the llvm.dbg.declare.
+DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) {
+ if (!V->hasNUses(2))
+ return 0;
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ User *U = *UI;
+ if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(U))
+ return DI;
+ if (isa<BitCastInst>(U) && U->hasOneUse()) {
+ if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(*U->use_begin()))
+ return DI;
+ }
+ }
+ return 0;
+}
+
+Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
+ Value *Val = SI.getOperand(0);
+ Value *Ptr = SI.getOperand(1);
+
+ // If the RHS is an alloca with a single use, zapify the store, making the
+ // alloca dead.
+ // If the RHS is an alloca with a two uses, the other one being a
+ // llvm.dbg.declare, zapify the store and the declare, making the
+ // alloca dead. We must do this to prevent declares from affecting
+ // codegen.
+ if (!SI.isVolatile()) {
+ if (Ptr->hasOneUse()) {
+ if (isa<AllocaInst>(Ptr))
+ return EraseInstFromFunction(SI);
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
+ if (isa<AllocaInst>(GEP->getOperand(0))) {
+ if (GEP->getOperand(0)->hasOneUse())
+ return EraseInstFromFunction(SI);
+ if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) {
+ EraseInstFromFunction(*DI);
+ return EraseInstFromFunction(SI);
+ }
+ }
+ }
+ }
+ if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) {
+ EraseInstFromFunction(*DI);
+ return EraseInstFromFunction(SI);
+ }
+ }
+
+ // Attempt to improve the alignment.
+ if (TD) {
+ unsigned KnownAlign =
+ GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
+ unsigned StoreAlign = SI.getAlignment();
+ unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
+ TD->getABITypeAlignment(Val->getType());
+
+ if (KnownAlign > EffectiveStoreAlign)
+ SI.setAlignment(KnownAlign);
+ else if (StoreAlign == 0)
+ SI.setAlignment(EffectiveStoreAlign);
+ }
+
+ // Do really simple DSE, to catch cases where there are several consecutive
+ // stores to the same location, separated by a few arithmetic operations. This
+ // situation often occurs with bitfield accesses.
+ BasicBlock::iterator BBI = &SI;
+ for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
+ --ScanInsts) {
+ --BBI;
+ // Don't count debug info directives, lest they affect codegen,
+ // and we skip pointer-to-pointer bitcasts, which are NOPs.
+ if (isa<DbgInfoIntrinsic>(BBI) ||
+ (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
+ ScanInsts++;
+ continue;
+ }
+
+ if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
+ // Prev store isn't volatile, and stores to the same location?
+ if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1),
+ SI.getOperand(1))) {
+ ++NumDeadStore;
+ ++BBI;
+ EraseInstFromFunction(*PrevSI);
+ continue;
+ }
+ break;
+ }
+
+ // If this is a load, we have to stop. However, if the loaded value is from
+ // the pointer we're loading and is producing the pointer we're storing,
+ // then *this* store is dead (X = load P; store X -> P).
+ if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+ if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
+ !SI.isVolatile())
+ return EraseInstFromFunction(SI);
+
+ // Otherwise, this is a load from some other location. Stores before it
+ // may not be dead.
+ break;
+ }
+
+ // Don't skip over loads or things that can modify memory.
+ if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
+ break;
+ }
+
+
+ if (SI.isVolatile()) return 0; // Don't hack volatile stores.
+
+ // store X, null -> turns into 'unreachable' in SimplifyCFG
+ if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
+ if (!isa<UndefValue>(Val)) {
+ SI.setOperand(0, UndefValue::get(Val->getType()));
+ if (Instruction *U = dyn_cast<Instruction>(Val))
+ Worklist.Add(U); // Dropped a use.
+ }
+ return 0; // Do not modify these!
+ }
+
+ // store undef, Ptr -> noop
+ if (isa<UndefValue>(Val))
+ return EraseInstFromFunction(SI);
+
+ // If the pointer destination is a cast, see if we can fold the cast into the
+ // source instead.
+ if (isa<CastInst>(Ptr))
+ if (Instruction *Res = InstCombineStoreToCast(*this, SI))
+ return Res;
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+ if (CE->isCast())
+ if (Instruction *Res = InstCombineStoreToCast(*this, SI))
+ return Res;
+
+
+ // If this store is the last instruction in the basic block (possibly
+ // excepting debug info instructions), and if the block ends with an
+ // unconditional branch, try to move it to the successor block.
+ BBI = &SI;
+ do {
+ ++BBI;
+ } while (isa<DbgInfoIntrinsic>(BBI) ||
+ (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
+ if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
+ if (BI->isUnconditional())
+ if (SimplifyStoreAtEndOfBlock(SI))
+ return 0; // xform done!
+
+ return 0;
+}
+
+/// SimplifyStoreAtEndOfBlock - Turn things like:
+/// if () { *P = v1; } else { *P = v2 }
+/// into a phi node with a store in the successor.
+///
+/// Simplify things like:
+/// *P = v1; if () { *P = v2; }
+/// into a phi node with a store in the successor.
+///
+bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
+ BasicBlock *StoreBB = SI.getParent();
+
+ // Check to see if the successor block has exactly two incoming edges. If
+ // so, see if the other predecessor contains a store to the same location.
+ // if so, insert a PHI node (if needed) and move the stores down.
+ BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
+
+ // Determine whether Dest has exactly two predecessors and, if so, compute
+ // the other predecessor.
+ pred_iterator PI = pred_begin(DestBB);
+ BasicBlock *P = *PI;
+ BasicBlock *OtherBB = 0;
+
+ if (P != StoreBB)
+ OtherBB = P;
+
+ if (++PI == pred_end(DestBB))
+ return false;
+
+ P = *PI;
+ if (P != StoreBB) {
+ if (OtherBB)
+ return false;
+ OtherBB = P;
+ }
+ if (++PI != pred_end(DestBB))
+ return false;
+
+ // Bail out if all the relevant blocks aren't distinct (this can happen,
+ // for example, if SI is in an infinite loop)
+ if (StoreBB == DestBB || OtherBB == DestBB)
+ return false;
+
+ // Verify that the other block ends in a branch and is not otherwise empty.
+ BasicBlock::iterator BBI = OtherBB->getTerminator();
+ BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
+ if (!OtherBr || BBI == OtherBB->begin())
+ return false;
+
+ // If the other block ends in an unconditional branch, check for the 'if then
+ // else' case. there is an instruction before the branch.
+ StoreInst *OtherStore = 0;
+ if (OtherBr->isUnconditional()) {
+ --BBI;
+ // Skip over debugging info.
+ while (isa<DbgInfoIntrinsic>(BBI) ||
+ (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
+ if (BBI==OtherBB->begin())
+ return false;
+ --BBI;
+ }
+ // If this isn't a store, isn't a store to the same location, or if the
+ // alignments differ, bail out.
+ OtherStore = dyn_cast<StoreInst>(BBI);
+ if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
+ OtherStore->getAlignment() != SI.getAlignment())
+ return false;
+ } else {
+ // Otherwise, the other block ended with a conditional branch. If one of the
+ // destinations is StoreBB, then we have the if/then case.
+ if (OtherBr->getSuccessor(0) != StoreBB &&
+ OtherBr->getSuccessor(1) != StoreBB)
+ return false;
+
+ // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
+ // if/then triangle. See if there is a store to the same ptr as SI that
+ // lives in OtherBB.
+ for (;; --BBI) {
+ // Check to see if we find the matching store.
+ if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
+ if (OtherStore->getOperand(1) != SI.getOperand(1) ||
+ OtherStore->getAlignment() != SI.getAlignment())
+ return false;
+ break;
+ }
+ // If we find something that may be using or overwriting the stored
+ // value, or if we run out of instructions, we can't do the xform.
+ if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
+ BBI == OtherBB->begin())
+ return false;
+ }
+
+ // In order to eliminate the store in OtherBr, we have to
+ // make sure nothing reads or overwrites the stored value in
+ // StoreBB.
+ for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
+ // FIXME: This should really be AA driven.
+ if (I->mayReadFromMemory() || I->mayWriteToMemory())
+ return false;
+ }
+ }
+
+ // Insert a PHI node now if we need it.
+ Value *MergedVal = OtherStore->getOperand(0);
+ if (MergedVal != SI.getOperand(0)) {
+ PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge");
+ PN->reserveOperandSpace(2);
+ PN->addIncoming(SI.getOperand(0), SI.getParent());
+ PN->addIncoming(OtherStore->getOperand(0), OtherBB);
+ MergedVal = InsertNewInstBefore(PN, DestBB->front());
+ }
+
+ // Advance to a place where it is safe to insert the new store and
+ // insert it.
+ BBI = DestBB->getFirstNonPHI();
+ InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1),
+ OtherStore->isVolatile(),
+ SI.getAlignment()), *BBI);
+
+ // Nuke the old stores.
+ EraseInstFromFunction(SI);
+ EraseInstFromFunction(*OtherStore);
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
new file mode 100644
index 0000000..b3974e8
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -0,0 +1,695 @@
+//===- InstCombineMulDivRem.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for mul, fmul, sdiv, udiv, fdiv,
+// srem, urem, frem.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// SubOne - Subtract one from a ConstantInt.
+static Constant *SubOne(ConstantInt *C) {
+ return ConstantInt::get(C->getContext(), C->getValue()-1);
+}
+
+/// MultiplyOverflows - True if the multiply can not be expressed in an int
+/// this size.
+static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
+ uint32_t W = C1->getBitWidth();
+ APInt LHSExt = C1->getValue(), RHSExt = C2->getValue();
+ if (sign) {
+ LHSExt.sext(W * 2);
+ RHSExt.sext(W * 2);
+ } else {
+ LHSExt.zext(W * 2);
+ RHSExt.zext(W * 2);
+ }
+
+ APInt MulExt = LHSExt * RHSExt;
+
+ if (!sign)
+ return MulExt.ugt(APInt::getLowBitsSet(W * 2, W));
+
+ APInt Min = APInt::getSignedMinValue(W).sext(W * 2);
+ APInt Max = APInt::getSignedMaxValue(W).sext(W * 2);
+ return MulExt.slt(Min) || MulExt.sgt(Max);
+}
+
+Instruction *InstCombiner::visitMul(BinaryOperator &I) {
+ bool Changed = SimplifyCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (isa<UndefValue>(Op1)) // undef * X -> 0
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+ // Simplify mul instructions with a constant RHS.
+ if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1C)) {
+
+ // ((X << C1)*C2) == (X * (C2 << C1))
+ if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
+ if (SI->getOpcode() == Instruction::Shl)
+ if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
+ return BinaryOperator::CreateMul(SI->getOperand(0),
+ ConstantExpr::getShl(CI, ShOp));
+
+ if (CI->isZero())
+ return ReplaceInstUsesWith(I, Op1C); // X * 0 == 0
+ if (CI->equalsInt(1)) // X * 1 == X
+ return ReplaceInstUsesWith(I, Op0);
+ if (CI->isAllOnesValue()) // X * -1 == 0 - X
+ return BinaryOperator::CreateNeg(Op0, I.getName());
+
+ const APInt& Val = cast<ConstantInt>(CI)->getValue();
+ if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C
+ return BinaryOperator::CreateShl(Op0,
+ ConstantInt::get(Op0->getType(), Val.logBase2()));
+ }
+ } else if (Op1C->getType()->isVectorTy()) {
+ if (Op1C->isNullValue())
+ return ReplaceInstUsesWith(I, Op1C);
+
+ if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
+ if (Op1V->isAllOnesValue()) // X * -1 == 0 - X
+ return BinaryOperator::CreateNeg(Op0, I.getName());
+
+ // As above, vector X*splat(1.0) -> X in all defined cases.
+ if (Constant *Splat = Op1V->getSplatValue()) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Splat))
+ if (CI->equalsInt(1))
+ return ReplaceInstUsesWith(I, Op0);
+ }
+ }
+ }
+
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
+ if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() &&
+ isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1C)) {
+ // Canonicalize (X+C1)*C2 -> X*C2+C1*C2.
+ Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C, "tmp");
+ Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1));
+ return BinaryOperator::CreateAdd(Add, C1C2);
+
+ }
+
+ // Try to fold constant mul into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ if (Value *Op0v = dyn_castNegVal(Op0)) // -X * -Y = X*Y
+ if (Value *Op1v = dyn_castNegVal(Op1))
+ return BinaryOperator::CreateMul(Op0v, Op1v);
+
+ // (X / Y) * Y = X - (X % Y)
+ // (X / Y) * -Y = (X % Y) - X
+ {
+ Value *Op1C = Op1;
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);
+ if (!BO ||
+ (BO->getOpcode() != Instruction::UDiv &&
+ BO->getOpcode() != Instruction::SDiv)) {
+ Op1C = Op0;
+ BO = dyn_cast<BinaryOperator>(Op1);
+ }
+ Value *Neg = dyn_castNegVal(Op1C);
+ if (BO && BO->hasOneUse() &&
+ (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) &&
+ (BO->getOpcode() == Instruction::UDiv ||
+ BO->getOpcode() == Instruction::SDiv)) {
+ Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1);
+
+ // If the division is exact, X % Y is zero.
+ if (SDivOperator *SDiv = dyn_cast<SDivOperator>(BO))
+ if (SDiv->isExact()) {
+ if (Op1BO == Op1C)
+ return ReplaceInstUsesWith(I, Op0BO);
+ return BinaryOperator::CreateNeg(Op0BO);
+ }
+
+ Value *Rem;
+ if (BO->getOpcode() == Instruction::UDiv)
+ Rem = Builder->CreateURem(Op0BO, Op1BO);
+ else
+ Rem = Builder->CreateSRem(Op0BO, Op1BO);
+ Rem->takeName(BO);
+
+ if (Op1BO == Op1C)
+ return BinaryOperator::CreateSub(Op0BO, Rem);
+ return BinaryOperator::CreateSub(Rem, Op0BO);
+ }
+ }
+
+ /// i1 mul -> i1 and.
+ if (I.getType()->isIntegerTy(1))
+ return BinaryOperator::CreateAnd(Op0, Op1);
+
+ // X*(1 << Y) --> X << Y
+ // (1 << Y)*X --> X << Y
+ {
+ Value *Y;
+ if (match(Op0, m_Shl(m_One(), m_Value(Y))))
+ return BinaryOperator::CreateShl(Op1, Y);
+ if (match(Op1, m_Shl(m_One(), m_Value(Y))))
+ return BinaryOperator::CreateShl(Op0, Y);
+ }
+
+ // If one of the operands of the multiply is a cast from a boolean value, then
+ // we know the bool is either zero or one, so this is a 'masking' multiply.
+ // X * Y (where Y is 0 or 1) -> X & (0-Y)
+ if (!I.getType()->isVectorTy()) {
+ // -2 is "-1 << 1" so it is all bits set except the low one.
+ APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
+
+ Value *BoolCast = 0, *OtherOp = 0;
+ if (MaskedValueIsZero(Op0, Negative2))
+ BoolCast = Op0, OtherOp = Op1;
+ else if (MaskedValueIsZero(Op1, Negative2))
+ BoolCast = Op1, OtherOp = Op0;
+
+ if (BoolCast) {
+ Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()),
+ BoolCast, "tmp");
+ return BinaryOperator::CreateAnd(V, OtherOp);
+ }
+ }
+
+ return Changed ? &I : 0;
+}
+
+Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
+ bool Changed = SimplifyCommutative(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // Simplify mul instructions with a constant RHS...
+ if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+ if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) {
+ // "In IEEE floating point, x*1 is not equivalent to x for nans. However,
+ // ANSI says we can drop signals, so we can do this anyway." (from GCC)
+ if (Op1F->isExactlyValue(1.0))
+ return ReplaceInstUsesWith(I, Op0); // Eliminate 'fmul double %X, 1.0'
+ } else if (Op1C->getType()->isVectorTy()) {
+ if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
+ // As above, vector X*splat(1.0) -> X in all defined cases.
+ if (Constant *Splat = Op1V->getSplatValue()) {
+ if (ConstantFP *F = dyn_cast<ConstantFP>(Splat))
+ if (F->isExactlyValue(1.0))
+ return ReplaceInstUsesWith(I, Op0);
+ }
+ }
+ }
+
+ // Try to fold constant mul into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ if (Value *Op0v = dyn_castFNegVal(Op0)) // -X * -Y = X*Y
+ if (Value *Op1v = dyn_castFNegVal(Op1))
+ return BinaryOperator::CreateFMul(Op0v, Op1v);
+
+ return Changed ? &I : 0;
+}
+
+/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select
+/// instruction.
+bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
+ SelectInst *SI = cast<SelectInst>(I.getOperand(1));
+
+ // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
+ int NonNullOperand = -1;
+ if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
+ if (ST->isNullValue())
+ NonNullOperand = 2;
+ // div/rem X, (Cond ? Y : 0) -> div/rem X, Y
+ if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
+ if (ST->isNullValue())
+ NonNullOperand = 1;
+
+ if (NonNullOperand == -1)
+ return false;
+
+ Value *SelectCond = SI->getOperand(0);
+
+ // Change the div/rem to use 'Y' instead of the select.
+ I.setOperand(1, SI->getOperand(NonNullOperand));
+
+ // Okay, we know we replace the operand of the div/rem with 'Y' with no
+ // problem. However, the select, or the condition of the select may have
+ // multiple uses. Based on our knowledge that the operand must be non-zero,
+ // propagate the known value for the select into other uses of it, and
+ // propagate a known value of the condition into its other users.
+
+ // If the select and condition only have a single use, don't bother with this,
+ // early exit.
+ if (SI->use_empty() && SelectCond->hasOneUse())
+ return true;
+
+ // Scan the current block backward, looking for other uses of SI.
+ BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin();
+
+ while (BBI != BBFront) {
+ --BBI;
+ // If we found a call to a function, we can't assume it will return, so
+ // information from below it cannot be propagated above it.
+ if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI))
+ break;
+
+ // Replace uses of the select or its condition with the known values.
+ for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end();
+ I != E; ++I) {
+ if (*I == SI) {
+ *I = SI->getOperand(NonNullOperand);
+ Worklist.Add(BBI);
+ } else if (*I == SelectCond) {
+ *I = NonNullOperand == 1 ? ConstantInt::getTrue(BBI->getContext()) :
+ ConstantInt::getFalse(BBI->getContext());
+ Worklist.Add(BBI);
+ }
+ }
+
+ // If we past the instruction, quit looking for it.
+ if (&*BBI == SI)
+ SI = 0;
+ if (&*BBI == SelectCond)
+ SelectCond = 0;
+
+ // If we ran out of things to eliminate, break out of the loop.
+ if (SelectCond == 0 && SI == 0)
+ break;
+
+ }
+ return true;
+}
+
+
+/// This function implements the transforms on div instructions that work
+/// regardless of the kind of div instruction it is (udiv, sdiv, or fdiv). It is
+/// used by the visitors to those instructions.
+/// @brief Transforms common to all three div instructions
+Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // undef / X -> 0 for integer.
+ // undef / X -> undef for FP (the undef could be a snan).
+ if (isa<UndefValue>(Op0)) {
+ if (Op0->getType()->isFPOrFPVectorTy())
+ return ReplaceInstUsesWith(I, Op0);
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ }
+
+ // X / undef -> undef
+ if (isa<UndefValue>(Op1))
+ return ReplaceInstUsesWith(I, Op1);
+
+ return 0;
+}
+
+/// This function implements the transforms common to both integer division
+/// instructions (udiv and sdiv). It is called by the visitors to those integer
+/// division instructions.
+/// @brief Common integer divide transforms
+Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // (sdiv X, X) --> 1 (udiv X, X) --> 1
+ if (Op0 == Op1) {
+ if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) {
+ Constant *CI = ConstantInt::get(Ty->getElementType(), 1);
+ std::vector<Constant*> Elts(Ty->getNumElements(), CI);
+ return ReplaceInstUsesWith(I, ConstantVector::get(Elts));
+ }
+
+ Constant *CI = ConstantInt::get(I.getType(), 1);
+ return ReplaceInstUsesWith(I, CI);
+ }
+
+ if (Instruction *Common = commonDivTransforms(I))
+ return Common;
+
+ // Handle cases involving: [su]div X, (select Cond, Y, Z)
+ // This does not apply for fdiv.
+ if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+ return &I;
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ // div X, 1 == X
+ if (RHS->equalsInt(1))
+ return ReplaceInstUsesWith(I, Op0);
+
+ // (X / C1) / C2 -> X / (C1*C2)
+ if (Instruction *LHS = dyn_cast<Instruction>(Op0))
+ if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode())
+ if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) {
+ if (MultiplyOverflows(RHS, LHSRHS,
+ I.getOpcode()==Instruction::SDiv))
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ else
+ return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
+ ConstantExpr::getMul(RHS, LHSRHS));
+ }
+
+ if (!RHS->isZero()) { // avoid X udiv 0
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+ }
+
+ // 0 / X == 0, we don't need to preserve faults!
+ if (ConstantInt *LHS = dyn_cast<ConstantInt>(Op0))
+ if (LHS->equalsInt(0))
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+ // It can't be division by zero, hence it must be division by one.
+ if (I.getType()->isIntegerTy(1))
+ return ReplaceInstUsesWith(I, Op0);
+
+ if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
+ if (ConstantInt *X = cast_or_null<ConstantInt>(Op1V->getSplatValue()))
+ // div X, 1 == X
+ if (X->isOne())
+ return ReplaceInstUsesWith(I, Op0);
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // Handle the integer div common cases
+ if (Instruction *Common = commonIDivTransforms(I))
+ return Common;
+
+ if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) {
+ // X udiv 2^C -> X >> C
+ // Check to see if this is an unsigned division with an exact power of 2,
+ // if so, convert to a right shift.
+ if (C->getValue().isPowerOf2()) // 0 not included in isPowerOf2
+ return BinaryOperator::CreateLShr(Op0,
+ ConstantInt::get(Op0->getType(), C->getValue().logBase2()));
+
+ // X udiv C, where C >= signbit
+ if (C->getValue().isNegative()) {
+ Value *IC = Builder->CreateICmpULT( Op0, C);
+ return SelectInst::Create(IC, Constant::getNullValue(I.getType()),
+ ConstantInt::get(I.getType(), 1));
+ }
+ }
+
+ // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2)
+ if (BinaryOperator *RHSI = dyn_cast<BinaryOperator>(I.getOperand(1))) {
+ if (RHSI->getOpcode() == Instruction::Shl &&
+ isa<ConstantInt>(RHSI->getOperand(0))) {
+ const APInt& C1 = cast<ConstantInt>(RHSI->getOperand(0))->getValue();
+ if (C1.isPowerOf2()) {
+ Value *N = RHSI->getOperand(1);
+ const Type *NTy = N->getType();
+ if (uint32_t C2 = C1.logBase2())
+ N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2), "tmp");
+ return BinaryOperator::CreateLShr(Op0, N);
+ }
+ }
+ }
+
+ // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
+ // where C1&C2 are powers of two.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+ if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1)))
+ if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) {
+ const APInt &TVA = STO->getValue(), &FVA = SFO->getValue();
+ if (TVA.isPowerOf2() && FVA.isPowerOf2()) {
+ // Compute the shift amounts
+ uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2();
+ // Construct the "on true" case of the select
+ Constant *TC = ConstantInt::get(Op0->getType(), TSA);
+ Value *TSI = Builder->CreateLShr(Op0, TC, SI->getName()+".t");
+
+ // Construct the "on false" case of the select
+ Constant *FC = ConstantInt::get(Op0->getType(), FSA);
+ Value *FSI = Builder->CreateLShr(Op0, FC, SI->getName()+".f");
+
+ // construct the select instruction and return it.
+ return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName());
+ }
+ }
+ return 0;
+}
+
+Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // Handle the integer div common cases
+ if (Instruction *Common = commonIDivTransforms(I))
+ return Common;
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ // sdiv X, -1 == -X
+ if (RHS->isAllOnesValue())
+ return BinaryOperator::CreateNeg(Op0);
+
+ // sdiv X, C --> ashr X, log2(C)
+ if (cast<SDivOperator>(&I)->isExact() &&
+ RHS->getValue().isNonNegative() &&
+ RHS->getValue().isPowerOf2()) {
+ Value *ShAmt = llvm::ConstantInt::get(RHS->getType(),
+ RHS->getValue().exactLogBase2());
+ return BinaryOperator::CreateAShr(Op0, ShAmt, I.getName());
+ }
+
+ // -X/C --> X/-C provided the negation doesn't overflow.
+ if (SubOperator *Sub = dyn_cast<SubOperator>(Op0))
+ if (isa<Constant>(Sub->getOperand(0)) &&
+ cast<Constant>(Sub->getOperand(0))->isNullValue() &&
+ Sub->hasNoSignedWrap())
+ return BinaryOperator::CreateSDiv(Sub->getOperand(1),
+ ConstantExpr::getNeg(RHS));
+ }
+
+ // If the sign bits of both operands are zero (i.e. we can prove they are
+ // unsigned inputs), turn this into a udiv.
+ if (I.getType()->isIntegerTy()) {
+ APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
+ if (MaskedValueIsZero(Op0, Mask)) {
+ if (MaskedValueIsZero(Op1, Mask)) {
+ // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
+ return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+ }
+ ConstantInt *ShiftedInt;
+ if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) &&
+ ShiftedInt->getValue().isPowerOf2()) {
+ // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
+ // Safe because the only negative value (1 << Y) can take on is
+ // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
+ // the sign bit set.
+ return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+ }
+ }
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
+ return commonDivTransforms(I);
+}
+
+/// This function implements the transforms on rem instructions that work
+/// regardless of the kind of rem instruction it is (urem, srem, or frem). It
+/// is used by the visitors to those instructions.
+/// @brief Transforms common to all three rem instructions
+Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (isa<UndefValue>(Op0)) { // undef % X -> 0
+ if (I.getType()->isFPOrFPVectorTy())
+ return ReplaceInstUsesWith(I, Op0); // X % undef -> undef (could be SNaN)
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ }
+ if (isa<UndefValue>(Op1))
+ return ReplaceInstUsesWith(I, Op1); // X % undef -> undef
+
+ // Handle cases involving: rem X, (select Cond, Y, Z)
+ if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+ return &I;
+
+ return 0;
+}
+
+/// This function implements the transforms common to both integer remainder
+/// instructions (urem and srem). It is called by the visitors to those integer
+/// remainder instructions.
+/// @brief Common integer remainder transforms
+Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Instruction *common = commonRemTransforms(I))
+ return common;
+
+ // 0 % X == 0 for integer, we don't need to preserve faults!
+ if (Constant *LHS = dyn_cast<Constant>(Op0))
+ if (LHS->isNullValue())
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ // X % 0 == undef, we don't need to preserve faults!
+ if (RHS->equalsInt(0))
+ return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
+
+ if (RHS->equalsInt(1)) // X % 1 == 0
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+ if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ } else if (isa<PHINode>(Op0I)) {
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
+
+ // See if we can fold away this rem instruction.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+ }
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitURem(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Instruction *common = commonIRemTransforms(I))
+ return common;
+
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ // X urem C^2 -> X and C
+ // Check to see if this is an unsigned remainder with an exact power of 2,
+ // if so, convert to a bitwise and.
+ if (ConstantInt *C = dyn_cast<ConstantInt>(RHS))
+ if (C->getValue().isPowerOf2())
+ return BinaryOperator::CreateAnd(Op0, SubOne(C));
+ }
+
+ if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) {
+ // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
+ if (RHSI->getOpcode() == Instruction::Shl &&
+ isa<ConstantInt>(RHSI->getOperand(0))) {
+ if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) {
+ Constant *N1 = Constant::getAllOnesValue(I.getType());
+ Value *Add = Builder->CreateAdd(RHSI, N1, "tmp");
+ return BinaryOperator::CreateAnd(Op0, Add);
+ }
+ }
+ }
+
+ // urem X, (select Cond, 2^C1, 2^C2) --> select Cond, (and X, C1), (and X, C2)
+ // where C1&C2 are powers of two.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) {
+ if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1)))
+ if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) {
+ // STO == 0 and SFO == 0 handled above.
+ if ((STO->getValue().isPowerOf2()) &&
+ (SFO->getValue().isPowerOf2())) {
+ Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO),
+ SI->getName()+".t");
+ Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO),
+ SI->getName()+".f");
+ return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd);
+ }
+ }
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // Handle the integer rem common cases
+ if (Instruction *Common = commonIRemTransforms(I))
+ return Common;
+
+ if (Value *RHSNeg = dyn_castNegVal(Op1))
+ if (!isa<Constant>(RHSNeg) ||
+ (isa<ConstantInt>(RHSNeg) &&
+ cast<ConstantInt>(RHSNeg)->getValue().isStrictlyPositive())) {
+ // X % -Y -> X % Y
+ Worklist.AddValue(I.getOperand(1));
+ I.setOperand(1, RHSNeg);
+ return &I;
+ }
+
+ // If the sign bits of both operands are zero (i.e. we can prove they are
+ // unsigned inputs), turn this into a urem.
+ if (I.getType()->isIntegerTy()) {
+ APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
+ if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
+ // X srem Y -> X urem Y, iff X and Y don't have sign bit set
+ return BinaryOperator::CreateURem(Op0, Op1, I.getName());
+ }
+ }
+
+ // If it's a constant vector, flip any negative values positive.
+ if (ConstantVector *RHSV = dyn_cast<ConstantVector>(Op1)) {
+ unsigned VWidth = RHSV->getNumOperands();
+
+ bool hasNegative = false;
+ for (unsigned i = 0; !hasNegative && i != VWidth; ++i)
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i)))
+ if (RHS->getValue().isNegative())
+ hasNegative = true;
+
+ if (hasNegative) {
+ std::vector<Constant *> Elts(VWidth);
+ for (unsigned i = 0; i != VWidth; ++i) {
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) {
+ if (RHS->getValue().isNegative())
+ Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS));
+ else
+ Elts[i] = RHS;
+ }
+ }
+
+ Constant *NewRHSV = ConstantVector::get(Elts);
+ if (NewRHSV != RHSV) {
+ Worklist.AddValue(I.getOperand(1));
+ I.setOperand(1, NewRHSV);
+ return &I;
+ }
+ }
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
+ return commonRemTransforms(I);
+}
+
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
new file mode 100644
index 0000000..f7fc62f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -0,0 +1,844 @@
+//===- InstCombinePHI.cpp -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitPHINode function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)]
+/// and if a/b/c and the add's all have a single use, turn this into a phi
+/// and a single binop.
+Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
+ Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
+ assert(isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst));
+ unsigned Opc = FirstInst->getOpcode();
+ Value *LHSVal = FirstInst->getOperand(0);
+ Value *RHSVal = FirstInst->getOperand(1);
+
+ const Type *LHSType = LHSVal->getType();
+ const Type *RHSType = RHSVal->getType();
+
+ // Scan to see if all operands are the same opcode, and all have one use.
+ for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
+ Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
+ if (!I || I->getOpcode() != Opc || !I->hasOneUse() ||
+ // Verify type of the LHS matches so we don't fold cmp's of different
+ // types or GEP's with different index types.
+ I->getOperand(0)->getType() != LHSType ||
+ I->getOperand(1)->getType() != RHSType)
+ return 0;
+
+ // If they are CmpInst instructions, check their predicates
+ if (Opc == Instruction::ICmp || Opc == Instruction::FCmp)
+ if (cast<CmpInst>(I)->getPredicate() !=
+ cast<CmpInst>(FirstInst)->getPredicate())
+ return 0;
+
+ // Keep track of which operand needs a phi node.
+ if (I->getOperand(0) != LHSVal) LHSVal = 0;
+ if (I->getOperand(1) != RHSVal) RHSVal = 0;
+ }
+
+ // If both LHS and RHS would need a PHI, don't do this transformation,
+ // because it would increase the number of PHIs entering the block,
+ // which leads to higher register pressure. This is especially
+ // bad when the PHIs are in the header of a loop.
+ if (!LHSVal && !RHSVal)
+ return 0;
+
+ // Otherwise, this is safe to transform!
+
+ Value *InLHS = FirstInst->getOperand(0);
+ Value *InRHS = FirstInst->getOperand(1);
+ PHINode *NewLHS = 0, *NewRHS = 0;
+ if (LHSVal == 0) {
+ NewLHS = PHINode::Create(LHSType,
+ FirstInst->getOperand(0)->getName() + ".pn");
+ NewLHS->reserveOperandSpace(PN.getNumOperands()/2);
+ NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0));
+ InsertNewInstBefore(NewLHS, PN);
+ LHSVal = NewLHS;
+ }
+
+ if (RHSVal == 0) {
+ NewRHS = PHINode::Create(RHSType,
+ FirstInst->getOperand(1)->getName() + ".pn");
+ NewRHS->reserveOperandSpace(PN.getNumOperands()/2);
+ NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0));
+ InsertNewInstBefore(NewRHS, PN);
+ RHSVal = NewRHS;
+ }
+
+ // Add all operands to the new PHIs.
+ if (NewLHS || NewRHS) {
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ Instruction *InInst = cast<Instruction>(PN.getIncomingValue(i));
+ if (NewLHS) {
+ Value *NewInLHS = InInst->getOperand(0);
+ NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i));
+ }
+ if (NewRHS) {
+ Value *NewInRHS = InInst->getOperand(1);
+ NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i));
+ }
+ }
+ }
+
+ if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
+ return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);
+ CmpInst *CIOp = cast<CmpInst>(FirstInst);
+ return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
+ LHSVal, RHSVal);
+}
+
+Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
+ GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0));
+
+ SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(),
+ FirstInst->op_end());
+ // This is true if all GEP bases are allocas and if all indices into them are
+ // constants.
+ bool AllBasePointersAreAllocas = true;
+
+ // We don't want to replace this phi if the replacement would require
+ // more than one phi, which leads to higher register pressure. This is
+ // especially bad when the PHIs are in the header of a loop.
+ bool NeededPhi = false;
+
+ // Scan to see if all operands are the same opcode, and all have one use.
+ for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
+ GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
+ if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() ||
+ GEP->getNumOperands() != FirstInst->getNumOperands())
+ return 0;
+
+ // Keep track of whether or not all GEPs are of alloca pointers.
+ if (AllBasePointersAreAllocas &&
+ (!isa<AllocaInst>(GEP->getOperand(0)) ||
+ !GEP->hasAllConstantIndices()))
+ AllBasePointersAreAllocas = false;
+
+ // Compare the operand lists.
+ for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) {
+ if (FirstInst->getOperand(op) == GEP->getOperand(op))
+ continue;
+
+ // Don't merge two GEPs when two operands differ (introducing phi nodes)
+ // if one of the PHIs has a constant for the index. The index may be
+ // substantially cheaper to compute for the constants, so making it a
+ // variable index could pessimize the path. This also handles the case
+ // for struct indices, which must always be constant.
+ if (isa<ConstantInt>(FirstInst->getOperand(op)) ||
+ isa<ConstantInt>(GEP->getOperand(op)))
+ return 0;
+
+ if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
+ return 0;
+
+ // If we already needed a PHI for an earlier operand, and another operand
+ // also requires a PHI, we'd be introducing more PHIs than we're
+ // eliminating, which increases register pressure on entry to the PHI's
+ // block.
+ if (NeededPhi)
+ return 0;
+
+ FixedOperands[op] = 0; // Needs a PHI.
+ NeededPhi = true;
+ }
+ }
+
+ // If all of the base pointers of the PHI'd GEPs are from allocas, don't
+ // bother doing this transformation. At best, this will just save a bit of
+ // offset calculation, but all the predecessors will have to materialize the
+ // stack address into a register anyway. We'd actually rather *clone* the
+ // load up into the predecessors so that we have a load of a gep of an alloca,
+ // which can usually all be folded into the load.
+ if (AllBasePointersAreAllocas)
+ return 0;
+
+ // Otherwise, this is safe to transform. Insert PHI nodes for each operand
+ // that is variable.
+ SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
+
+ bool HasAnyPHIs = false;
+ for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) {
+ if (FixedOperands[i]) continue; // operand doesn't need a phi.
+ Value *FirstOp = FirstInst->getOperand(i);
+ PHINode *NewPN = PHINode::Create(FirstOp->getType(),
+ FirstOp->getName()+".pn");
+ InsertNewInstBefore(NewPN, PN);
+
+ NewPN->reserveOperandSpace(e);
+ NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
+ OperandPhis[i] = NewPN;
+ FixedOperands[i] = NewPN;
+ HasAnyPHIs = true;
+ }
+
+
+ // Add all operands to the new PHIs.
+ if (HasAnyPHIs) {
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i));
+ BasicBlock *InBB = PN.getIncomingBlock(i);
+
+ for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op)
+ if (PHINode *OpPhi = OperandPhis[op])
+ OpPhi->addIncoming(InGEP->getOperand(op), InBB);
+ }
+ }
+
+ Value *Base = FixedOperands[0];
+ return cast<GEPOperator>(FirstInst)->isInBounds() ?
+ GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1,
+ FixedOperands.end()) :
+ GetElementPtrInst::Create(Base, FixedOperands.begin()+1,
+ FixedOperands.end());
+}
+
+
+/// isSafeAndProfitableToSinkLoad - Return true if we know that it is safe to
+/// sink the load out of the block that defines it. This means that it must be
+/// obvious the value of the load is not changed from the point of the load to
+/// the end of the block it is in.
+///
+/// Finally, it is safe, but not profitable, to sink a load targetting a
+/// non-address-taken alloca. Doing so will cause us to not promote the alloca
+/// to a register.
+static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
+ BasicBlock::iterator BBI = L, E = L->getParent()->end();
+
+ for (++BBI; BBI != E; ++BBI)
+ if (BBI->mayWriteToMemory())
+ return false;
+
+ // Check for non-address taken alloca. If not address-taken already, it isn't
+ // profitable to do this xform.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) {
+ bool isAddressTaken = false;
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E; ++UI) {
+ User *U = *UI;
+ if (isa<LoadInst>(U)) continue;
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ // If storing TO the alloca, then the address isn't taken.
+ if (SI->getOperand(1) == AI) continue;
+ }
+ isAddressTaken = true;
+ break;
+ }
+
+ if (!isAddressTaken && AI->isStaticAlloca())
+ return false;
+ }
+
+ // If this load is a load from a GEP with a constant offset from an alloca,
+ // then we don't want to sink it. In its present form, it will be
+ // load [constant stack offset]. Sinking it will cause us to have to
+ // materialize the stack addresses in each predecessor in a register only to
+ // do a shared load from register in the successor.
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(L->getOperand(0)))
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0)))
+ if (AI->isStaticAlloca() && GEP->hasAllConstantIndices())
+ return false;
+
+ return true;
+}
+
+Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
+ LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0));
+
+ // When processing loads, we need to propagate two bits of information to the
+ // sunk load: whether it is volatile, and what its alignment is. We currently
+ // don't sink loads when some have their alignment specified and some don't.
+ // visitLoadInst will propagate an alignment onto the load when TD is around,
+ // and if TD isn't around, we can't handle the mixed case.
+ bool isVolatile = FirstLI->isVolatile();
+ unsigned LoadAlignment = FirstLI->getAlignment();
+ unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
+
+ // We can't sink the load if the loaded value could be modified between the
+ // load and the PHI.
+ if (FirstLI->getParent() != PN.getIncomingBlock(0) ||
+ !isSafeAndProfitableToSinkLoad(FirstLI))
+ return 0;
+
+ // If the PHI is of volatile loads and the load block has multiple
+ // successors, sinking it would remove a load of the volatile value from
+ // the path through the other successor.
+ if (isVolatile &&
+ FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)
+ return 0;
+
+ // Check to see if all arguments are the same operation.
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
+ if (!LI || !LI->hasOneUse())
+ return 0;
+
+ // We can't sink the load if the loaded value could be modified between
+ // the load and the PHI.
+ if (LI->isVolatile() != isVolatile ||
+ LI->getParent() != PN.getIncomingBlock(i) ||
+ LI->getPointerAddressSpace() != LoadAddrSpace ||
+ !isSafeAndProfitableToSinkLoad(LI))
+ return 0;
+
+ // If some of the loads have an alignment specified but not all of them,
+ // we can't do the transformation.
+ if ((LoadAlignment != 0) != (LI->getAlignment() != 0))
+ return 0;
+
+ LoadAlignment = std::min(LoadAlignment, LI->getAlignment());
+
+ // If the PHI is of volatile loads and the load block has multiple
+ // successors, sinking it would remove a load of the volatile value from
+ // the path through the other successor.
+ if (isVolatile &&
+ LI->getParent()->getTerminator()->getNumSuccessors() != 1)
+ return 0;
+ }
+
+ // Okay, they are all the same operation. Create a new PHI node of the
+ // correct type, and PHI together all of the LHS's of the instructions.
+ PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(),
+ PN.getName()+".in");
+ NewPN->reserveOperandSpace(PN.getNumOperands()/2);
+
+ Value *InVal = FirstLI->getOperand(0);
+ NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
+
+ // Add all operands to the new PHI.
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0);
+ if (NewInVal != InVal)
+ InVal = 0;
+ NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
+ }
+
+ Value *PhiVal;
+ if (InVal) {
+ // The new PHI unions all of the same values together. This is really
+ // common, so we handle it intelligently here for compile-time speed.
+ PhiVal = InVal;
+ delete NewPN;
+ } else {
+ InsertNewInstBefore(NewPN, PN);
+ PhiVal = NewPN;
+ }
+
+ // If this was a volatile load that we are merging, make sure to loop through
+ // and mark all the input loads as non-volatile. If we don't do this, we will
+ // insert a new volatile load and the old ones will not be deletable.
+ if (isVolatile)
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+ cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
+
+ return new LoadInst(PhiVal, "", isVolatile, LoadAlignment);
+}
+
+
+
+/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
+/// operator and they all are only used by the PHI, PHI together their
+/// inputs, and do the operation once, to the result of the PHI.
+Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
+ Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
+
+ if (isa<GetElementPtrInst>(FirstInst))
+ return FoldPHIArgGEPIntoPHI(PN);
+ if (isa<LoadInst>(FirstInst))
+ return FoldPHIArgLoadIntoPHI(PN);
+
+ // Scan the instruction, looking for input operations that can be folded away.
+ // If all input operands to the phi are the same instruction (e.g. a cast from
+ // the same type or "+42") we can pull the operation through the PHI, reducing
+ // code size and simplifying code.
+ Constant *ConstantOp = 0;
+ const Type *CastSrcTy = 0;
+
+ if (isa<CastInst>(FirstInst)) {
+ CastSrcTy = FirstInst->getOperand(0)->getType();
+
+ // Be careful about transforming integer PHIs. We don't want to pessimize
+ // the code by turning an i32 into an i1293.
+ if (PN.getType()->isIntegerTy() && CastSrcTy->isIntegerTy()) {
+ if (!ShouldChangeType(PN.getType(), CastSrcTy))
+ return 0;
+ }
+ } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
+ // Can fold binop, compare or shift here if the RHS is a constant,
+ // otherwise call FoldPHIArgBinOpIntoPHI.
+ ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
+ if (ConstantOp == 0)
+ return FoldPHIArgBinOpIntoPHI(PN);
+ } else {
+ return 0; // Cannot fold this operation.
+ }
+
+ // Check to see if all arguments are the same operation.
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
+ if (I == 0 || !I->hasOneUse() || !I->isSameOperationAs(FirstInst))
+ return 0;
+ if (CastSrcTy) {
+ if (I->getOperand(0)->getType() != CastSrcTy)
+ return 0; // Cast operation must match.
+ } else if (I->getOperand(1) != ConstantOp) {
+ return 0;
+ }
+ }
+
+ // Okay, they are all the same operation. Create a new PHI node of the
+ // correct type, and PHI together all of the LHS's of the instructions.
+ PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(),
+ PN.getName()+".in");
+ NewPN->reserveOperandSpace(PN.getNumOperands()/2);
+
+ Value *InVal = FirstInst->getOperand(0);
+ NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
+
+ // Add all operands to the new PHI.
+ for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+ Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0);
+ if (NewInVal != InVal)
+ InVal = 0;
+ NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
+ }
+
+ Value *PhiVal;
+ if (InVal) {
+ // The new PHI unions all of the same values together. This is really
+ // common, so we handle it intelligently here for compile-time speed.
+ PhiVal = InVal;
+ delete NewPN;
+ } else {
+ InsertNewInstBefore(NewPN, PN);
+ PhiVal = NewPN;
+ }
+
+ // Insert and return the new operation.
+ if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst))
+ return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType());
+
+ if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
+ return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
+
+ CmpInst *CIOp = cast<CmpInst>(FirstInst);
+ return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
+ PhiVal, ConstantOp);
+}
+
+/// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle
+/// that is dead.
+static bool DeadPHICycle(PHINode *PN,
+ SmallPtrSet<PHINode*, 16> &PotentiallyDeadPHIs) {
+ if (PN->use_empty()) return true;
+ if (!PN->hasOneUse()) return false;
+
+ // Remember this node, and if we find the cycle, return.
+ if (!PotentiallyDeadPHIs.insert(PN))
+ return true;
+
+ // Don't scan crazily complex things.
+ if (PotentiallyDeadPHIs.size() == 16)
+ return false;
+
+ if (PHINode *PU = dyn_cast<PHINode>(PN->use_back()))
+ return DeadPHICycle(PU, PotentiallyDeadPHIs);
+
+ return false;
+}
+
+/// PHIsEqualValue - Return true if this phi node is always equal to
+/// NonPhiInVal. This happens with mutually cyclic phi nodes like:
+/// z = some value; x = phi (y, z); y = phi (x, z)
+static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
+ SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) {
+ // See if we already saw this PHI node.
+ if (!ValueEqualPHIs.insert(PN))
+ return true;
+
+ // Don't scan crazily complex things.
+ if (ValueEqualPHIs.size() == 16)
+ return false;
+
+ // Scan the operands to see if they are either phi nodes or are equal to
+ // the value.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *Op = PN->getIncomingValue(i);
+ if (PHINode *OpPN = dyn_cast<PHINode>(Op)) {
+ if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs))
+ return false;
+ } else if (Op != NonPhiInVal)
+ return false;
+ }
+
+ return true;
+}
+
+
+namespace {
+struct PHIUsageRecord {
+ unsigned PHIId; // The ID # of the PHI (something determinstic to sort on)
+ unsigned Shift; // The amount shifted.
+ Instruction *Inst; // The trunc instruction.
+
+ PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
+ : PHIId(pn), Shift(Sh), Inst(User) {}
+
+ bool operator<(const PHIUsageRecord &RHS) const {
+ if (PHIId < RHS.PHIId) return true;
+ if (PHIId > RHS.PHIId) return false;
+ if (Shift < RHS.Shift) return true;
+ if (Shift > RHS.Shift) return false;
+ return Inst->getType()->getPrimitiveSizeInBits() <
+ RHS.Inst->getType()->getPrimitiveSizeInBits();
+ }
+};
+
+struct LoweredPHIRecord {
+ PHINode *PN; // The PHI that was lowered.
+ unsigned Shift; // The amount shifted.
+ unsigned Width; // The width extracted.
+
+ LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty)
+ : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
+
+ // Ctor form used by DenseMap.
+ LoweredPHIRecord(PHINode *pn, unsigned Sh)
+ : PN(pn), Shift(Sh), Width(0) {}
+};
+}
+
+namespace llvm {
+ template<>
+ struct DenseMapInfo<LoweredPHIRecord> {
+ static inline LoweredPHIRecord getEmptyKey() {
+ return LoweredPHIRecord(0, 0);
+ }
+ static inline LoweredPHIRecord getTombstoneKey() {
+ return LoweredPHIRecord(0, 1);
+ }
+ static unsigned getHashValue(const LoweredPHIRecord &Val) {
+ return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^
+ (Val.Width>>3);
+ }
+ static bool isEqual(const LoweredPHIRecord &LHS,
+ const LoweredPHIRecord &RHS) {
+ return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift &&
+ LHS.Width == RHS.Width;
+ }
+ };
+ template <>
+ struct isPodLike<LoweredPHIRecord> { static const bool value = true; };
+}
+
+
+/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an
+/// illegal type: see if it is only used by trunc or trunc(lshr) operations. If
+/// so, we split the PHI into the various pieces being extracted. This sort of
+/// thing is introduced when SROA promotes an aggregate to large integer values.
+///
+/// TODO: The user of the trunc may be an bitcast to float/double/vector or an
+/// inttoptr. We should produce new PHIs in the right type.
+///
+Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
+ // PHIUsers - Keep track of all of the truncated values extracted from a set
+ // of PHIs, along with their offset. These are the things we want to rewrite.
+ SmallVector<PHIUsageRecord, 16> PHIUsers;
+
+ // PHIs are often mutually cyclic, so we keep track of a whole set of PHI
+ // nodes which are extracted from. PHIsToSlice is a set we use to avoid
+ // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to
+ // check the uses of (to ensure they are all extracts).
+ SmallVector<PHINode*, 8> PHIsToSlice;
+ SmallPtrSet<PHINode*, 8> PHIsInspected;
+
+ PHIsToSlice.push_back(&FirstPhi);
+ PHIsInspected.insert(&FirstPhi);
+
+ for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) {
+ PHINode *PN = PHIsToSlice[PHIId];
+
+ // Scan the input list of the PHI. If any input is an invoke, and if the
+ // input is defined in the predecessor, then we won't be split the critical
+ // edge which is required to insert a truncate. Because of this, we have to
+ // bail out.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i));
+ if (II == 0) continue;
+ if (II->getParent() != PN->getIncomingBlock(i))
+ continue;
+
+ // If we have a phi, and if it's directly in the predecessor, then we have
+ // a critical edge where we need to put the truncate. Since we can't
+ // split the edge in instcombine, we have to bail out.
+ return 0;
+ }
+
+
+ for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ // If the user is a PHI, inspect its uses recursively.
+ if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+ if (PHIsInspected.insert(UserPN))
+ PHIsToSlice.push_back(UserPN);
+ continue;
+ }
+
+ // Truncates are always ok.
+ if (isa<TruncInst>(User)) {
+ PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User));
+ continue;
+ }
+
+ // Otherwise it must be a lshr which can only be used by one trunc.
+ if (User->getOpcode() != Instruction::LShr ||
+ !User->hasOneUse() || !isa<TruncInst>(User->use_back()) ||
+ !isa<ConstantInt>(User->getOperand(1)))
+ return 0;
+
+ unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
+ PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back()));
+ }
+ }
+
+ // If we have no users, they must be all self uses, just nuke the PHI.
+ if (PHIUsers.empty())
+ return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType()));
+
+ // If this phi node is transformable, create new PHIs for all the pieces
+ // extracted out of it. First, sort the users by their offset and size.
+ array_pod_sort(PHIUsers.begin(), PHIUsers.end());
+
+ DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
+ for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+ errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
+ );
+
+ // PredValues - This is a temporary used when rewriting PHI nodes. It is
+ // hoisted out here to avoid construction/destruction thrashing.
+ DenseMap<BasicBlock*, Value*> PredValues;
+
+ // ExtractedVals - Each new PHI we introduce is saved here so we don't
+ // introduce redundant PHIs.
+ DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals;
+
+ for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) {
+ unsigned PHIId = PHIUsers[UserI].PHIId;
+ PHINode *PN = PHIsToSlice[PHIId];
+ unsigned Offset = PHIUsers[UserI].Shift;
+ const Type *Ty = PHIUsers[UserI].Inst->getType();
+
+ PHINode *EltPHI;
+
+ // If we've already lowered a user like this, reuse the previously lowered
+ // value.
+ if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
+
+ // Otherwise, Create the new PHI node for this user.
+ EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN);
+ assert(EltPHI->getType() != PN->getType() &&
+ "Truncate didn't shrink phi?");
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ Value *&PredVal = PredValues[Pred];
+
+ // If we already have a value for this predecessor, reuse it.
+ if (PredVal) {
+ EltPHI->addIncoming(PredVal, Pred);
+ continue;
+ }
+
+ // Handle the PHI self-reuse case.
+ Value *InVal = PN->getIncomingValue(i);
+ if (InVal == PN) {
+ PredVal = EltPHI;
+ EltPHI->addIncoming(PredVal, Pred);
+ continue;
+ }
+
+ if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {
+ // If the incoming value was a PHI, and if it was one of the PHIs we
+ // already rewrote it, just use the lowered value.
+ if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) {
+ PredVal = Res;
+ EltPHI->addIncoming(PredVal, Pred);
+ continue;
+ }
+ }
+
+ // Otherwise, do an extract in the predecessor.
+ Builder->SetInsertPoint(Pred, Pred->getTerminator());
+ Value *Res = InVal;
+ if (Offset)
+ Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(),
+ Offset), "extract");
+ Res = Builder->CreateTrunc(Res, Ty, "extract.t");
+ PredVal = Res;
+ EltPHI->addIncoming(Res, Pred);
+
+ // If the incoming value was a PHI, and if it was one of the PHIs we are
+ // rewriting, we will ultimately delete the code we inserted. This
+ // means we need to revisit that PHI to make sure we extract out the
+ // needed piece.
+ if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i)))
+ if (PHIsInspected.count(OldInVal)) {
+ unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(),
+ OldInVal)-PHIsToSlice.begin();
+ PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
+ cast<Instruction>(Res)));
+ ++UserE;
+ }
+ }
+ PredValues.clear();
+
+ DEBUG(errs() << " Made element PHI for offset " << Offset << ": "
+ << *EltPHI << '\n');
+ ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
+ }
+
+ // Replace the use of this piece with the PHI node.
+ ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI);
+ }
+
+ // Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
+ // with undefs.
+ Value *Undef = UndefValue::get(FirstPhi.getType());
+ for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+ ReplaceInstUsesWith(*PHIsToSlice[i], Undef);
+ return ReplaceInstUsesWith(FirstPhi, Undef);
+}
+
+// PHINode simplification
+//
+Instruction *InstCombiner::visitPHINode(PHINode &PN) {
+ // If LCSSA is around, don't mess with Phi nodes
+ if (MustPreserveLCSSA) return 0;
+
+ if (Value *V = PN.hasConstantValue())
+ return ReplaceInstUsesWith(PN, V);
+
+ // If all PHI operands are the same operation, pull them through the PHI,
+ // reducing code size.
+ if (isa<Instruction>(PN.getIncomingValue(0)) &&
+ isa<Instruction>(PN.getIncomingValue(1)) &&
+ cast<Instruction>(PN.getIncomingValue(0))->getOpcode() ==
+ cast<Instruction>(PN.getIncomingValue(1))->getOpcode() &&
+ // FIXME: The hasOneUse check will fail for PHIs that use the value more
+ // than themselves more than once.
+ PN.getIncomingValue(0)->hasOneUse())
+ if (Instruction *Result = FoldPHIArgOpIntoPHI(PN))
+ return Result;
+
+ // If this is a trivial cycle in the PHI node graph, remove it. Basically, if
+ // this PHI only has a single use (a PHI), and if that PHI only has one use (a
+ // PHI)... break the cycle.
+ if (PN.hasOneUse()) {
+ Instruction *PHIUser = cast<Instruction>(PN.use_back());
+ if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) {
+ SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs;
+ PotentiallyDeadPHIs.insert(&PN);
+ if (DeadPHICycle(PU, PotentiallyDeadPHIs))
+ return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
+ }
+
+ // If this phi has a single use, and if that use just computes a value for
+ // the next iteration of a loop, delete the phi. This occurs with unused
+ // induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this
+ // common case here is good because the only other things that catch this
+ // are induction variable analysis (sometimes) and ADCE, which is only run
+ // late.
+ if (PHIUser->hasOneUse() &&
+ (isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) &&
+ PHIUser->use_back() == &PN) {
+ return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
+ }
+ }
+
+ // We sometimes end up with phi cycles that non-obviously end up being the
+ // same value, for example:
+ // z = some value; x = phi (y, z); y = phi (x, z)
+ // where the phi nodes don't necessarily need to be in the same block. Do a
+ // quick check to see if the PHI node only contains a single non-phi value, if
+ // so, scan to see if the phi cycle is actually equal to that value.
+ {
+ unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues();
+ // Scan for the first non-phi operand.
+ while (InValNo != NumOperandVals &&
+ isa<PHINode>(PN.getIncomingValue(InValNo)))
+ ++InValNo;
+
+ if (InValNo != NumOperandVals) {
+ Value *NonPhiInVal = PN.getOperand(InValNo);
+
+ // Scan the rest of the operands to see if there are any conflicts, if so
+ // there is no need to recursively scan other phis.
+ for (++InValNo; InValNo != NumOperandVals; ++InValNo) {
+ Value *OpVal = PN.getIncomingValue(InValNo);
+ if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))
+ break;
+ }
+
+ // If we scanned over all operands, then we have one unique value plus
+ // phi values. Scan PHI nodes to see if they all merge in each other or
+ // the value.
+ if (InValNo == NumOperandVals) {
+ SmallPtrSet<PHINode*, 16> ValueEqualPHIs;
+ if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs))
+ return ReplaceInstUsesWith(PN, NonPhiInVal);
+ }
+ }
+ }
+
+ // If there are multiple PHIs, sort their operands so that they all list
+ // the blocks in the same order. This will help identical PHIs be eliminated
+ // by other passes. Other passes shouldn't depend on this for correctness
+ // however.
+ PHINode *FirstPN = cast<PHINode>(PN.getParent()->begin());
+ if (&PN != FirstPN)
+ for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *BBA = PN.getIncomingBlock(i);
+ BasicBlock *BBB = FirstPN->getIncomingBlock(i);
+ if (BBA != BBB) {
+ Value *VA = PN.getIncomingValue(i);
+ unsigned j = PN.getBasicBlockIndex(BBB);
+ Value *VB = PN.getIncomingValue(j);
+ PN.setIncomingBlock(i, BBB);
+ PN.setIncomingValue(i, VB);
+ PN.setIncomingBlock(j, BBA);
+ PN.setIncomingValue(j, VA);
+ // NOTE: Instcombine normally would want us to "return &PN" if we
+ // modified any of the operands of an instruction. However, since we
+ // aren't adding or removing uses (just rearranging them) we don't do
+ // this in this case.
+ }
+ }
+
+ // If this is an integer PHI and we know that it has an illegal type, see if
+ // it is only used by trunc or trunc(lshr) operations. If so, we split the
+ // PHI into the various pieces being extracted. This sort of thing is
+ // introduced when SROA promotes an aggregate to a single large integer type.
+ if (PN.getType()->isIntegerTy() && TD &&
+ !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
+ if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
+ return Res;
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
new file mode 100644
index 0000000..c44fe9d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -0,0 +1,704 @@
+//===- InstCombineSelect.cpp ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitSelect function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms,
+/// returning the kind and providing the out parameter results if we
+/// successfully match.
+static SelectPatternFlavor
+MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
+ SelectInst *SI = dyn_cast<SelectInst>(V);
+ if (SI == 0) return SPF_UNKNOWN;
+
+ ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition());
+ if (ICI == 0) return SPF_UNKNOWN;
+
+ LHS = ICI->getOperand(0);
+ RHS = ICI->getOperand(1);
+
+ // (icmp X, Y) ? X : Y
+ if (SI->getTrueValue() == ICI->getOperand(0) &&
+ SI->getFalseValue() == ICI->getOperand(1)) {
+ switch (ICI->getPredicate()) {
+ default: return SPF_UNKNOWN; // Equality.
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE: return SPF_UMAX;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE: return SPF_SMAX;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE: return SPF_UMIN;
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE: return SPF_SMIN;
+ }
+ }
+
+ // (icmp X, Y) ? Y : X
+ if (SI->getTrueValue() == ICI->getOperand(1) &&
+ SI->getFalseValue() == ICI->getOperand(0)) {
+ switch (ICI->getPredicate()) {
+ default: return SPF_UNKNOWN; // Equality.
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE: return SPF_UMIN;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE: return SPF_SMIN;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE: return SPF_UMAX;
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE: return SPF_SMAX;
+ }
+ }
+
+ // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5)
+
+ return SPF_UNKNOWN;
+}
+
+
+/// GetSelectFoldableOperands - We want to turn code that looks like this:
+/// %C = or %A, %B
+/// %D = select %cond, %C, %A
+/// into:
+/// %C = select %cond, %B, 0
+/// %D = or %A, %C
+///
+/// Assuming that the specified instruction is an operand to the select, return
+/// a bitmask indicating which operands of this instruction are foldable if they
+/// equal the other incoming value of the select.
+///
+static unsigned GetSelectFoldableOperands(Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ return 3; // Can fold through either operand.
+ case Instruction::Sub: // Can only fold on the amount subtracted.
+ case Instruction::Shl: // Can only fold on the shift amount.
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return 1;
+ default:
+ return 0; // Cannot fold
+ }
+}
+
+/// GetSelectFoldableConstant - For the same transformation as the previous
+/// function, return the identity constant that goes into the select.
+static Constant *GetSelectFoldableConstant(Instruction *I) {
+ switch (I->getOpcode()) {
+ default: llvm_unreachable("This cannot happen!");
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return Constant::getNullValue(I->getType());
+ case Instruction::And:
+ return Constant::getAllOnesValue(I->getType());
+ case Instruction::Mul:
+ return ConstantInt::get(I->getType(), 1);
+ }
+}
+
+/// FoldSelectOpOp - Here we have (select c, TI, FI), and we know that TI and FI
+/// have the same opcode and only one use each. Try to simplify this.
+Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
+ Instruction *FI) {
+ if (TI->getNumOperands() == 1) {
+ // If this is a non-volatile load or a cast from the same type,
+ // merge.
+ if (TI->isCast()) {
+ if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType())
+ return 0;
+ } else {
+ return 0; // unknown unary op.
+ }
+
+ // Fold this by inserting a select from the input values.
+ SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0),
+ FI->getOperand(0), SI.getName()+".v");
+ InsertNewInstBefore(NewSI, SI);
+ return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,
+ TI->getType());
+ }
+
+ // Only handle binary operators here.
+ if (!isa<BinaryOperator>(TI))
+ return 0;
+
+ // Figure out if the operations have any operands in common.
+ Value *MatchOp, *OtherOpT, *OtherOpF;
+ bool MatchIsOpZero;
+ if (TI->getOperand(0) == FI->getOperand(0)) {
+ MatchOp = TI->getOperand(0);
+ OtherOpT = TI->getOperand(1);
+ OtherOpF = FI->getOperand(1);
+ MatchIsOpZero = true;
+ } else if (TI->getOperand(1) == FI->getOperand(1)) {
+ MatchOp = TI->getOperand(1);
+ OtherOpT = TI->getOperand(0);
+ OtherOpF = FI->getOperand(0);
+ MatchIsOpZero = false;
+ } else if (!TI->isCommutative()) {
+ return 0;
+ } else if (TI->getOperand(0) == FI->getOperand(1)) {
+ MatchOp = TI->getOperand(0);
+ OtherOpT = TI->getOperand(1);
+ OtherOpF = FI->getOperand(0);
+ MatchIsOpZero = true;
+ } else if (TI->getOperand(1) == FI->getOperand(0)) {
+ MatchOp = TI->getOperand(1);
+ OtherOpT = TI->getOperand(0);
+ OtherOpF = FI->getOperand(1);
+ MatchIsOpZero = true;
+ } else {
+ return 0;
+ }
+
+ // If we reach here, they do have operations in common.
+ SelectInst *NewSI = SelectInst::Create(SI.getCondition(), OtherOpT,
+ OtherOpF, SI.getName()+".v");
+ InsertNewInstBefore(NewSI, SI);
+
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TI)) {
+ if (MatchIsOpZero)
+ return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI);
+ else
+ return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp);
+ }
+ llvm_unreachable("Shouldn't get here");
+ return 0;
+}
+
+static bool isSelect01(Constant *C1, Constant *C2) {
+ ConstantInt *C1I = dyn_cast<ConstantInt>(C1);
+ if (!C1I)
+ return false;
+ ConstantInt *C2I = dyn_cast<ConstantInt>(C2);
+ if (!C2I)
+ return false;
+ return (C1I->isZero() || C1I->isOne()) && (C2I->isZero() || C2I->isOne());
+}
+
+/// FoldSelectIntoOp - Try fold the select into one of the operands to
+/// facilitate further optimization.
+Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
+ Value *FalseVal) {
+ // See the comment above GetSelectFoldableOperands for a description of the
+ // transformation we are doing here.
+ if (Instruction *TVI = dyn_cast<Instruction>(TrueVal)) {
+ if (TVI->hasOneUse() && TVI->getNumOperands() == 2 &&
+ !isa<Constant>(FalseVal)) {
+ if (unsigned SFO = GetSelectFoldableOperands(TVI)) {
+ unsigned OpToFold = 0;
+ if ((SFO & 1) && FalseVal == TVI->getOperand(0)) {
+ OpToFold = 1;
+ } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) {
+ OpToFold = 2;
+ }
+
+ if (OpToFold) {
+ Constant *C = GetSelectFoldableConstant(TVI);
+ Value *OOp = TVI->getOperand(2-OpToFold);
+ // Avoid creating select between 2 constants unless it's selecting
+ // between 0 and 1.
+ if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
+ Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C);
+ InsertNewInstBefore(NewSel, SI);
+ NewSel->takeName(TVI);
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI))
+ return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel);
+ llvm_unreachable("Unknown instruction!!");
+ }
+ }
+ }
+ }
+ }
+
+ if (Instruction *FVI = dyn_cast<Instruction>(FalseVal)) {
+ if (FVI->hasOneUse() && FVI->getNumOperands() == 2 &&
+ !isa<Constant>(TrueVal)) {
+ if (unsigned SFO = GetSelectFoldableOperands(FVI)) {
+ unsigned OpToFold = 0;
+ if ((SFO & 1) && TrueVal == FVI->getOperand(0)) {
+ OpToFold = 1;
+ } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) {
+ OpToFold = 2;
+ }
+
+ if (OpToFold) {
+ Constant *C = GetSelectFoldableConstant(FVI);
+ Value *OOp = FVI->getOperand(2-OpToFold);
+ // Avoid creating select between 2 constants unless it's selecting
+ // between 0 and 1.
+ if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
+ Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp);
+ InsertNewInstBefore(NewSel, SI);
+ NewSel->takeName(FVI);
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI))
+ return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel);
+ llvm_unreachable("Unknown instruction!!");
+ }
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// visitSelectInstWithICmp - Visit a SelectInst that has an
+/// ICmpInst as its first operand.
+///
+Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
+ ICmpInst *ICI) {
+ bool Changed = false;
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ Value *CmpLHS = ICI->getOperand(0);
+ Value *CmpRHS = ICI->getOperand(1);
+ Value *TrueVal = SI.getTrueValue();
+ Value *FalseVal = SI.getFalseValue();
+
+ // Check cases where the comparison is with a constant that
+ // can be adjusted to fit the min/max idiom. We may edit ICI in
+ // place here, so make sure the select is the only user.
+ if (ICI->hasOneUse())
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) {
+ switch (Pred) {
+ default: break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT: {
+ // X < MIN ? T : F --> F
+ if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT))
+ return ReplaceInstUsesWith(SI, FalseVal);
+ // X < C ? X : C-1 --> X > C-1 ? C-1 : X
+ Constant *AdjustedRHS =
+ ConstantInt::get(CI->getContext(), CI->getValue()-1);
+ if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
+ (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ CmpRHS = AdjustedRHS;
+ std::swap(FalseVal, TrueVal);
+ ICI->setPredicate(Pred);
+ ICI->setOperand(1, CmpRHS);
+ SI.setOperand(1, TrueVal);
+ SI.setOperand(2, FalseVal);
+ Changed = true;
+ }
+ break;
+ }
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT: {
+ // X > MAX ? T : F --> F
+ if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT))
+ return ReplaceInstUsesWith(SI, FalseVal);
+ // X > C ? X : C+1 --> X < C+1 ? C+1 : X
+ Constant *AdjustedRHS =
+ ConstantInt::get(CI->getContext(), CI->getValue()+1);
+ if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
+ (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ CmpRHS = AdjustedRHS;
+ std::swap(FalseVal, TrueVal);
+ ICI->setPredicate(Pred);
+ ICI->setOperand(1, CmpRHS);
+ SI.setOperand(1, TrueVal);
+ SI.setOperand(2, FalseVal);
+ Changed = true;
+ }
+ break;
+ }
+ }
+ }
+
+ // Transform (X >s -1) ? C1 : C2 --> ((X >>s 31) & (C2 - C1)) + C1
+ // and (X <s 0) ? C2 : C1 --> ((X >>s 31) & (C2 - C1)) + C1
+ // FIXME: Type and constness constraints could be lifted, but we have to
+ // watch code size carefully. We should consider xor instead of
+ // sub/add when we decide to do that.
+ if (const IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) {
+ if (TrueVal->getType() == Ty) {
+ if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) {
+ ConstantInt *C1 = NULL, *C2 = NULL;
+ if (Pred == ICmpInst::ICMP_SGT && Cmp->isAllOnesValue()) {
+ C1 = dyn_cast<ConstantInt>(TrueVal);
+ C2 = dyn_cast<ConstantInt>(FalseVal);
+ } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isNullValue()) {
+ C1 = dyn_cast<ConstantInt>(FalseVal);
+ C2 = dyn_cast<ConstantInt>(TrueVal);
+ }
+ if (C1 && C2) {
+ // This shift results in either -1 or 0.
+ Value *AShr = Builder->CreateAShr(CmpLHS, Ty->getBitWidth()-1);
+
+ // Check if we can express the operation with a single or.
+ if (C2->isAllOnesValue())
+ return ReplaceInstUsesWith(SI, Builder->CreateOr(AShr, C1));
+
+ Value *And = Builder->CreateAnd(AShr, C2->getValue()-C1->getValue());
+ return ReplaceInstUsesWith(SI, Builder->CreateAdd(And, C1));
+ }
+ }
+ }
+ }
+
+ if (CmpLHS == TrueVal && CmpRHS == FalseVal) {
+ // Transform (X == Y) ? X : Y -> Y
+ if (Pred == ICmpInst::ICMP_EQ)
+ return ReplaceInstUsesWith(SI, FalseVal);
+ // Transform (X != Y) ? X : Y -> X
+ if (Pred == ICmpInst::ICMP_NE)
+ return ReplaceInstUsesWith(SI, TrueVal);
+ /// NOTE: if we wanted to, this is where to detect integer MIN/MAX
+
+ } else if (CmpLHS == FalseVal && CmpRHS == TrueVal) {
+ // Transform (X == Y) ? Y : X -> X
+ if (Pred == ICmpInst::ICMP_EQ)
+ return ReplaceInstUsesWith(SI, FalseVal);
+ // Transform (X != Y) ? Y : X -> Y
+ if (Pred == ICmpInst::ICMP_NE)
+ return ReplaceInstUsesWith(SI, TrueVal);
+ /// NOTE: if we wanted to, this is where to detect integer MIN/MAX
+ }
+ return Changed ? &SI : 0;
+}
+
+
+/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a
+/// PHI node (but the two may be in different blocks). See if the true/false
+/// values (V) are live in all of the predecessor blocks of the PHI. For
+/// example, cases like this cannot be mapped:
+///
+/// X = phi [ C1, BB1], [C2, BB2]
+/// Y = add
+/// Z = select X, Y, 0
+///
+/// because Y is not live in BB1/BB2.
+///
+static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
+ const SelectInst &SI) {
+ // If the value is a non-instruction value like a constant or argument, it
+ // can always be mapped.
+ const Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return true;
+
+ // If V is a PHI node defined in the same block as the condition PHI, we can
+ // map the arguments.
+ const PHINode *CondPHI = cast<PHINode>(SI.getCondition());
+
+ if (const PHINode *VP = dyn_cast<PHINode>(I))
+ if (VP->getParent() == CondPHI->getParent())
+ return true;
+
+ // Otherwise, if the PHI and select are defined in the same block and if V is
+ // defined in a different block, then we can transform it.
+ if (SI.getParent() == CondPHI->getParent() &&
+ I->getParent() != CondPHI->getParent())
+ return true;
+
+ // Otherwise we have a 'hard' case and we can't tell without doing more
+ // detailed dominator based analysis, punt.
+ return false;
+}
+
+/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form:
+/// SPF2(SPF1(A, B), C)
+Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
+ SelectPatternFlavor SPF1,
+ Value *A, Value *B,
+ Instruction &Outer,
+ SelectPatternFlavor SPF2, Value *C) {
+ if (C == A || C == B) {
+ // MAX(MAX(A, B), B) -> MAX(A, B)
+ // MIN(MIN(a, b), a) -> MIN(a, b)
+ if (SPF1 == SPF2)
+ return ReplaceInstUsesWith(Outer, Inner);
+
+ // MAX(MIN(a, b), a) -> a
+ // MIN(MAX(a, b), a) -> a
+ if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) ||
+ (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) ||
+ (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) ||
+ (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN))
+ return ReplaceInstUsesWith(Outer, C);
+ }
+
+ // TODO: MIN(MIN(A, 23), 97)
+ return 0;
+}
+
+
+
+
+Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
+ Value *CondVal = SI.getCondition();
+ Value *TrueVal = SI.getTrueValue();
+ Value *FalseVal = SI.getFalseValue();
+
+ if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, TD))
+ return ReplaceInstUsesWith(SI, V);
+
+ if (SI.getType()->isIntegerTy(1)) {
+ if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) {
+ if (C->getZExtValue()) {
+ // Change: A = select B, true, C --> A = or B, C
+ return BinaryOperator::CreateOr(CondVal, FalseVal);
+ }
+ // Change: A = select B, false, C --> A = and !B, C
+ Value *NotCond =
+ InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
+ "not."+CondVal->getName()), SI);
+ return BinaryOperator::CreateAnd(NotCond, FalseVal);
+ } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
+ if (C->getZExtValue() == false) {
+ // Change: A = select B, C, false --> A = and B, C
+ return BinaryOperator::CreateAnd(CondVal, TrueVal);
+ }
+ // Change: A = select B, C, true --> A = or !B, C
+ Value *NotCond =
+ InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
+ "not."+CondVal->getName()), SI);
+ return BinaryOperator::CreateOr(NotCond, TrueVal);
+ }
+
+ // select a, b, a -> a&b
+ // select a, a, b -> a|b
+ if (CondVal == TrueVal)
+ return BinaryOperator::CreateOr(CondVal, FalseVal);
+ else if (CondVal == FalseVal)
+ return BinaryOperator::CreateAnd(CondVal, TrueVal);
+ }
+
+ // Selecting between two integer constants?
+ if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal))
+ if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal)) {
+ // select C, 1, 0 -> zext C to int
+ if (FalseValC->isZero() && TrueValC->getValue() == 1)
+ return new ZExtInst(CondVal, SI.getType());
+
+ // select C, -1, 0 -> sext C to int
+ if (FalseValC->isZero() && TrueValC->isAllOnesValue())
+ return new SExtInst(CondVal, SI.getType());
+
+ // select C, 0, 1 -> zext !C to int
+ if (TrueValC->isZero() && FalseValC->getValue() == 1) {
+ Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
+ return new ZExtInst(NotCond, SI.getType());
+ }
+
+ // select C, 0, -1 -> sext !C to int
+ if (TrueValC->isZero() && FalseValC->isAllOnesValue()) {
+ Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
+ return new SExtInst(NotCond, SI.getType());
+ }
+
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition())) {
+ // If one of the constants is zero (we know they can't both be) and we
+ // have an icmp instruction with zero, and we have an 'and' with the
+ // non-constant value, eliminate this whole mess. This corresponds to
+ // cases like this: ((X & 27) ? 27 : 0)
+ if (TrueValC->isZero() || FalseValC->isZero())
+ if (IC->isEquality() && isa<ConstantInt>(IC->getOperand(1)) &&
+ cast<Constant>(IC->getOperand(1))->isNullValue())
+ if (Instruction *ICA = dyn_cast<Instruction>(IC->getOperand(0)))
+ if (ICA->getOpcode() == Instruction::And &&
+ isa<ConstantInt>(ICA->getOperand(1)) &&
+ (ICA->getOperand(1) == TrueValC ||
+ ICA->getOperand(1) == FalseValC) &&
+ cast<ConstantInt>(ICA->getOperand(1))->getValue().isPowerOf2()) {
+ // Okay, now we know that everything is set up, we just don't
+ // know whether we have a icmp_ne or icmp_eq and whether the
+ // true or false val is the zero.
+ bool ShouldNotVal = !TrueValC->isZero();
+ ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
+ Value *V = ICA;
+ if (ShouldNotVal)
+ V = Builder->CreateXor(V, ICA->getOperand(1));
+ return ReplaceInstUsesWith(SI, V);
+ }
+ }
+ }
+
+ // See if we are selecting two values based on a comparison of the two values.
+ if (FCmpInst *FCI = dyn_cast<FCmpInst>(CondVal)) {
+ if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) {
+ // Transform (X == Y) ? X : Y -> Y
+ if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
+ // This is not safe in general for floating point:
+ // consider X== -0, Y== +0.
+ // It becomes safe if either operand is a nonzero constant.
+ ConstantFP *CFPt, *CFPf;
+ if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+ !CFPt->getValueAPF().isZero()) ||
+ ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+ !CFPf->getValueAPF().isZero()))
+ return ReplaceInstUsesWith(SI, FalseVal);
+ }
+ // Transform (X une Y) ? X : Y -> X
+ if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
+ // This is not safe in general for floating point:
+ // consider X== -0, Y== +0.
+ // It becomes safe if either operand is a nonzero constant.
+ ConstantFP *CFPt, *CFPf;
+ if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+ !CFPt->getValueAPF().isZero()) ||
+ ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+ !CFPf->getValueAPF().isZero()))
+ return ReplaceInstUsesWith(SI, TrueVal);
+ }
+ // NOTE: if we wanted to, this is where to detect MIN/MAX
+
+ } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
+ // Transform (X == Y) ? Y : X -> X
+ if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
+ // This is not safe in general for floating point:
+ // consider X== -0, Y== +0.
+ // It becomes safe if either operand is a nonzero constant.
+ ConstantFP *CFPt, *CFPf;
+ if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+ !CFPt->getValueAPF().isZero()) ||
+ ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+ !CFPf->getValueAPF().isZero()))
+ return ReplaceInstUsesWith(SI, FalseVal);
+ }
+ // Transform (X une Y) ? Y : X -> Y
+ if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
+ // This is not safe in general for floating point:
+ // consider X== -0, Y== +0.
+ // It becomes safe if either operand is a nonzero constant.
+ ConstantFP *CFPt, *CFPf;
+ if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+ !CFPt->getValueAPF().isZero()) ||
+ ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+ !CFPf->getValueAPF().isZero()))
+ return ReplaceInstUsesWith(SI, TrueVal);
+ }
+ // NOTE: if we wanted to, this is where to detect MIN/MAX
+ }
+ // NOTE: if we wanted to, this is where to detect ABS
+ }
+
+ // See if we are selecting two values based on a comparison of the two values.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(CondVal))
+ if (Instruction *Result = visitSelectInstWithICmp(SI, ICI))
+ return Result;
+
+ if (Instruction *TI = dyn_cast<Instruction>(TrueVal))
+ if (Instruction *FI = dyn_cast<Instruction>(FalseVal))
+ if (TI->hasOneUse() && FI->hasOneUse()) {
+ Instruction *AddOp = 0, *SubOp = 0;
+
+ // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
+ if (TI->getOpcode() == FI->getOpcode())
+ if (Instruction *IV = FoldSelectOpOp(SI, TI, FI))
+ return IV;
+
+ // Turn select C, (X+Y), (X-Y) --> (X+(select C, Y, (-Y))). This is
+ // even legal for FP.
+ if ((TI->getOpcode() == Instruction::Sub &&
+ FI->getOpcode() == Instruction::Add) ||
+ (TI->getOpcode() == Instruction::FSub &&
+ FI->getOpcode() == Instruction::FAdd)) {
+ AddOp = FI; SubOp = TI;
+ } else if ((FI->getOpcode() == Instruction::Sub &&
+ TI->getOpcode() == Instruction::Add) ||
+ (FI->getOpcode() == Instruction::FSub &&
+ TI->getOpcode() == Instruction::FAdd)) {
+ AddOp = TI; SubOp = FI;
+ }
+
+ if (AddOp) {
+ Value *OtherAddOp = 0;
+ if (SubOp->getOperand(0) == AddOp->getOperand(0)) {
+ OtherAddOp = AddOp->getOperand(1);
+ } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) {
+ OtherAddOp = AddOp->getOperand(0);
+ }
+
+ if (OtherAddOp) {
+ // So at this point we know we have (Y -> OtherAddOp):
+ // select C, (add X, Y), (sub X, Z)
+ Value *NegVal; // Compute -Z
+ if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) {
+ NegVal = ConstantExpr::getNeg(C);
+ } else {
+ NegVal = InsertNewInstBefore(
+ BinaryOperator::CreateNeg(SubOp->getOperand(1),
+ "tmp"), SI);
+ }
+
+ Value *NewTrueOp = OtherAddOp;
+ Value *NewFalseOp = NegVal;
+ if (AddOp != TI)
+ std::swap(NewTrueOp, NewFalseOp);
+ Instruction *NewSel =
+ SelectInst::Create(CondVal, NewTrueOp,
+ NewFalseOp, SI.getName() + ".p");
+
+ NewSel = InsertNewInstBefore(NewSel, SI);
+ return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
+ }
+ }
+ }
+
+ // See if we can fold the select into one of our operands.
+ if (SI.getType()->isIntegerTy()) {
+ if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
+ return FoldI;
+
+ // MAX(MAX(a, b), a) -> MAX(a, b)
+ // MIN(MIN(a, b), a) -> MIN(a, b)
+ // MAX(MIN(a, b), a) -> a
+ // MIN(MAX(a, b), a) -> a
+ Value *LHS, *RHS, *LHS2, *RHS2;
+ if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) {
+ if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2))
+ if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
+ SI, SPF, RHS))
+ return R;
+ if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2))
+ if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2,
+ SI, SPF, LHS))
+ return R;
+ }
+
+ // TODO.
+ // ABS(-X) -> ABS(X)
+ // ABS(ABS(X)) -> ABS(X)
+ }
+
+ // See if we can fold the select into a phi node if the condition is a select.
+ if (isa<PHINode>(SI.getCondition()))
+ // The true/false values have to be live in the PHI predecessor's blocks.
+ if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) &&
+ CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI))
+ if (Instruction *NV = FoldOpIntoPhi(SI))
+ return NV;
+
+ if (BinaryOperator::isNot(CondVal)) {
+ SI.setOperand(0, BinaryOperator::getNotArgument(CondVal));
+ SI.setOperand(1, FalseVal);
+ SI.setOperand(2, TrueVal);
+ return &SI;
+ }
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
new file mode 100644
index 0000000..27716b8
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -0,0 +1,702 @@
+//===- InstCombineShifts.cpp ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitShl, visitLShr, and visitAShr functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
+ assert(I.getOperand(1)->getType() == I.getOperand(0)->getType());
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // shl X, 0 == X and shr X, 0 == X
+ // shl 0, X == 0 and shr 0, X == 0
+ if (Op1 == Constant::getNullValue(Op1->getType()) ||
+ Op0 == Constant::getNullValue(Op0->getType()))
+ return ReplaceInstUsesWith(I, Op0);
+
+ if (isa<UndefValue>(Op0)) {
+ if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef
+ return ReplaceInstUsesWith(I, Op0);
+ else // undef << X -> 0, undef >>u X -> 0
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ }
+ if (isa<UndefValue>(Op1)) {
+ if (I.getOpcode() == Instruction::AShr) // X >>s undef -> X
+ return ReplaceInstUsesWith(I, Op0);
+ else // X << undef, X >>u undef -> 0
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ }
+
+ // See if we can fold away this shift.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ // Try to fold constant and into select arguments.
+ if (isa<Constant>(Op0))
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+
+ if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1))
+ if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
+ return Res;
+ return 0;
+}
+
+/// CanEvaluateShifted - See if we can compute the specified value, but shifted
+/// logically to the left or right by some number of bits. This should return
+/// true if the expression can be computed for the same cost as the current
+/// expression tree. This is used to eliminate extraneous shifting from things
+/// like:
+/// %C = shl i128 %A, 64
+/// %D = shl i128 %B, 96
+/// %E = or i128 %C, %D
+/// %F = lshr i128 %E, 64
+/// where the client will ask if E can be computed shifted right by 64-bits. If
+/// this succeeds, the GetShiftedValue function will be called to produce the
+/// value.
+static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
+ InstCombiner &IC) {
+ // We can always evaluate constants shifted.
+ if (isa<Constant>(V))
+ return true;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // If this is the opposite shift, we can directly reuse the input of the shift
+ // if the needed bits are already zero in the input. This allows us to reuse
+ // the value which means that we don't care if the shift has multiple uses.
+ // TODO: Handle opposite shift by exact value.
+ ConstantInt *CI;
+ if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) ||
+ (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) {
+ if (CI->getZExtValue() == NumBits) {
+ // TODO: Check that the input bits are already zero with MaskedValueIsZero
+#if 0
+ // If this is a truncate of a logical shr, we can truncate it to a smaller
+ // lshr iff we know that the bits we would otherwise be shifting in are
+ // already zeros.
+ uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ if (MaskedValueIsZero(I->getOperand(0),
+ APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
+ CI->getLimitedValue(BitWidth) < BitWidth) {
+ return CanEvaluateTruncated(I->getOperand(0), Ty);
+ }
+#endif
+
+ }
+ }
+
+ // We can't mutate something that has multiple uses: doing so would
+ // require duplicating the instruction in general, which isn't profitable.
+ if (!I->hasOneUse()) return false;
+
+ switch (I->getOpcode()) {
+ default: return false;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
+ return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) &&
+ CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC);
+
+ case Instruction::Shl: {
+ // We can often fold the shift into shifts-by-a-constant.
+ CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (CI == 0) return false;
+
+ // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
+ if (isLeftShift) return true;
+
+ // We can always turn shl(c)+shr(c) -> and(c2).
+ if (CI->getValue() == NumBits) return true;
+
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+ // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't
+ // profitable unless we know the and'd out bits are already zero.
+ if (CI->getZExtValue() > NumBits) {
+ unsigned HighBits = CI->getZExtValue() - NumBits;
+ if (MaskedValueIsZero(I->getOperand(0),
+ APInt::getHighBitsSet(TypeWidth, HighBits)))
+ return true;
+ }
+
+ return false;
+ }
+ case Instruction::LShr: {
+ // We can often fold the shift into shifts-by-a-constant.
+ CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (CI == 0) return false;
+
+ // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
+ if (!isLeftShift) return true;
+
+ // We can always turn lshr(c)+shl(c) -> and(c2).
+ if (CI->getValue() == NumBits) return true;
+
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+ // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't
+ // profitable unless we know the and'd out bits are already zero.
+ if (CI->getZExtValue() > NumBits) {
+ unsigned LowBits = CI->getZExtValue() - NumBits;
+ if (MaskedValueIsZero(I->getOperand(0),
+ APInt::getLowBitsSet(TypeWidth, LowBits)))
+ return true;
+ }
+
+ return false;
+ }
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(I);
+ return CanEvaluateShifted(SI->getTrueValue(), NumBits, isLeftShift, IC) &&
+ CanEvaluateShifted(SI->getFalseValue(), NumBits, isLeftShift, IC);
+ }
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,IC))
+ return false;
+ return true;
+ }
+ }
+}
+
+/// GetShiftedValue - When CanEvaluateShifted returned true for an expression,
+/// this value inserts the new computation that produces the shifted value.
+static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
+ InstCombiner &IC) {
+ // We can always evaluate constants shifted.
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ if (isLeftShift)
+ V = IC.Builder->CreateShl(C, NumBits);
+ else
+ V = IC.Builder->CreateLShr(C, NumBits);
+ // If we got a constantexpr back, try to simplify it with TD info.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ V = ConstantFoldConstantExpression(CE, IC.getTargetData());
+ return V;
+ }
+
+ Instruction *I = cast<Instruction>(V);
+ IC.Worklist.Add(I);
+
+ switch (I->getOpcode()) {
+ default: assert(0 && "Inconsistency with CanEvaluateShifted");
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
+ I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC));
+ I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+ return I;
+
+ case Instruction::Shl: {
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+ // We only accept shifts-by-a-constant in CanEvaluateShifted.
+ ConstantInt *CI = cast<ConstantInt>(I->getOperand(1));
+
+ // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
+ if (isLeftShift) {
+ // If this is oversized composite shift, then unsigned shifts get 0.
+ unsigned NewShAmt = NumBits+CI->getZExtValue();
+ if (NewShAmt >= TypeWidth)
+ return Constant::getNullValue(I->getType());
+
+ I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt));
+ return I;
+ }
+
+ // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have
+ // zeros.
+ if (CI->getValue() == NumBits) {
+ APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits));
+ V = IC.Builder->CreateAnd(I->getOperand(0),
+ ConstantInt::get(I->getContext(), Mask));
+ if (Instruction *VI = dyn_cast<Instruction>(V)) {
+ VI->moveBefore(I);
+ VI->takeName(I);
+ }
+ return V;
+ }
+
+ // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that
+ // the and won't be needed.
+ assert(CI->getZExtValue() > NumBits);
+ I->setOperand(1, ConstantInt::get(I->getType(),
+ CI->getZExtValue() - NumBits));
+ return I;
+ }
+ case Instruction::LShr: {
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+ // We only accept shifts-by-a-constant in CanEvaluateShifted.
+ ConstantInt *CI = cast<ConstantInt>(I->getOperand(1));
+
+ // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
+ if (!isLeftShift) {
+ // If this is oversized composite shift, then unsigned shifts get 0.
+ unsigned NewShAmt = NumBits+CI->getZExtValue();
+ if (NewShAmt >= TypeWidth)
+ return Constant::getNullValue(I->getType());
+
+ I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt));
+ return I;
+ }
+
+ // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have
+ // zeros.
+ if (CI->getValue() == NumBits) {
+ APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits));
+ V = IC.Builder->CreateAnd(I->getOperand(0),
+ ConstantInt::get(I->getContext(), Mask));
+ if (Instruction *VI = dyn_cast<Instruction>(V)) {
+ VI->moveBefore(I);
+ VI->takeName(I);
+ }
+ return V;
+ }
+
+ // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that
+ // the and won't be needed.
+ assert(CI->getZExtValue() > NumBits);
+ I->setOperand(1, ConstantInt::get(I->getType(),
+ CI->getZExtValue() - NumBits));
+ return I;
+ }
+
+ case Instruction::Select:
+ I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+ I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC));
+ return I;
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i),
+ NumBits, isLeftShift, IC));
+ return PN;
+ }
+ }
+}
+
+
+
+Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
+ BinaryOperator &I) {
+ bool isLeftShift = I.getOpcode() == Instruction::Shl;
+
+
+ // See if we can propagate this shift into the input, this covers the trivial
+ // cast of lshr(shl(x,c1),c2) as well as other more complex cases.
+ if (I.getOpcode() != Instruction::AShr &&
+ CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) {
+ DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
+ " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n");
+
+ return ReplaceInstUsesWith(I,
+ GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this));
+ }
+
+
+ // See if we can simplify any instructions used by the instruction whose sole
+ // purpose is to compute bits we don't care about.
+ uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
+
+ // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate
+ // a signed shift.
+ //
+ if (Op1->uge(TypeBits)) {
+ if (I.getOpcode() != Instruction::AShr)
+ return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
+ // ashr i32 X, 32 --> ashr i32 X, 31
+ I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
+ return &I;
+ }
+
+ // ((X*C1) << C2) == (X * (C1 << C2))
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
+ if (BO->getOpcode() == Instruction::Mul && isLeftShift)
+ if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
+ return BinaryOperator::CreateMul(BO->getOperand(0),
+ ConstantExpr::getShl(BOOp, Op1));
+
+ // Try to fold constant and into select arguments.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+
+ // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
+ if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) {
+ Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0));
+ // If 'shift2' is an ashr, we would have to get the sign bit into a funny
+ // place. Don't try to do this transformation in this case. Also, we
+ // require that the input operand is a shift-by-constant so that we have
+ // confidence that the shifts will get folded together. We could do this
+ // xform in more cases, but it is unlikely to be profitable.
+ if (TrOp && I.isLogicalShift() && TrOp->isShift() &&
+ isa<ConstantInt>(TrOp->getOperand(1))) {
+ // Okay, we'll do this xform. Make the shift of shift.
+ Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
+ // (shift2 (shift1 & 0x00FF), c2)
+ Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName());
+
+ // For logical shifts, the truncation has the effect of making the high
+ // part of the register be zeros. Emulate this by inserting an AND to
+ // clear the top bits as needed. This 'and' will usually be zapped by
+ // other xforms later if dead.
+ unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();
+ unsigned DstSize = TI->getType()->getScalarSizeInBits();
+ APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
+
+ // The mask we constructed says what the trunc would do if occurring
+ // between the shifts. We want to know the effect *after* the second
+ // shift. We know that it is a logical shift by a constant, so adjust the
+ // mask as appropriate.
+ if (I.getOpcode() == Instruction::Shl)
+ MaskV <<= Op1->getZExtValue();
+ else {
+ assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
+ MaskV = MaskV.lshr(Op1->getZExtValue());
+ }
+
+ // shift1 & 0x00FF
+ Value *And = Builder->CreateAnd(NSh,
+ ConstantInt::get(I.getContext(), MaskV),
+ TI->getName());
+
+ // Return the value truncated to the interesting size.
+ return new TruncInst(And, I.getType());
+ }
+ }
+
+ if (Op0->hasOneUse()) {
+ if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
+ // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C)
+ Value *V1, *V2;
+ ConstantInt *CC;
+ switch (Op0BO->getOpcode()) {
+ default: break;
+ case Instruction::Add:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ // These operators commute.
+ // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C)
+ if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() &&
+ match(Op0BO->getOperand(1), m_Shr(m_Value(V1),
+ m_Specific(Op1)))) {
+ Value *YS = // (Y << C)
+ Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
+ // (X + (Y << C))
+ Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1,
+ Op0BO->getOperand(1)->getName());
+ uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
+ return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
+ APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
+ }
+
+ // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C))
+ Value *Op0BOOp1 = Op0BO->getOperand(1);
+ if (isLeftShift && Op0BOOp1->hasOneUse() &&
+ match(Op0BOOp1,
+ m_And(m_Shr(m_Value(V1), m_Specific(Op1)),
+ m_ConstantInt(CC))) &&
+ cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) {
+ Value *YS = // (Y << C)
+ Builder->CreateShl(Op0BO->getOperand(0), Op1,
+ Op0BO->getName());
+ // X & (CC << C)
+ Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+ V1->getName()+".mask");
+ return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
+ }
+ }
+
+ // FALL THROUGH.
+ case Instruction::Sub: {
+ // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C)
+ if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
+ match(Op0BO->getOperand(0), m_Shr(m_Value(V1),
+ m_Specific(Op1)))) {
+ Value *YS = // (Y << C)
+ Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+ // (X + (Y << C))
+ Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS,
+ Op0BO->getOperand(0)->getName());
+ uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
+ return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
+ APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
+ }
+
+ // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C)
+ if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
+ match(Op0BO->getOperand(0),
+ m_And(m_Shr(m_Value(V1), m_Value(V2)),
+ m_ConstantInt(CC))) && V2 == Op1 &&
+ cast<BinaryOperator>(Op0BO->getOperand(0))
+ ->getOperand(0)->hasOneUse()) {
+ Value *YS = // (Y << C)
+ Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+ // X & (CC << C)
+ Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+ V1->getName()+".mask");
+
+ return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
+ }
+
+ break;
+ }
+ }
+
+
+ // If the operand is an bitwise operator with a constant RHS, and the
+ // shift is the only use, we can pull it out of the shift.
+ if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) {
+ bool isValid = true; // Valid only for And, Or, Xor
+ bool highBitSet = false; // Transform if high bit of constant set?
+
+ switch (Op0BO->getOpcode()) {
+ default: isValid = false; break; // Do not perform transform!
+ case Instruction::Add:
+ isValid = isLeftShift;
+ break;
+ case Instruction::Or:
+ case Instruction::Xor:
+ highBitSet = false;
+ break;
+ case Instruction::And:
+ highBitSet = true;
+ break;
+ }
+
+ // If this is a signed shift right, and the high bit is modified
+ // by the logical operation, do not perform the transformation.
+ // The highBitSet boolean indicates the value of the high bit of
+ // the constant which would cause it to be modified for this
+ // operation.
+ //
+ if (isValid && I.getOpcode() == Instruction::AShr)
+ isValid = Op0C->getValue()[TypeBits-1] == highBitSet;
+
+ if (isValid) {
+ Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
+
+ Value *NewShift =
+ Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
+ NewShift->takeName(Op0BO);
+
+ return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
+ NewRHS);
+ }
+ }
+ }
+ }
+
+ // Find out if this is a shift of a shift by a constant.
+ BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
+ if (ShiftOp && !ShiftOp->isShift())
+ ShiftOp = 0;
+
+ if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
+ ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
+ uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
+ uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits);
+ assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
+ if (ShiftAmt1 == 0) return 0; // Will be simplified in the future.
+ Value *X = ShiftOp->getOperand(0);
+
+ uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift.
+
+ const IntegerType *Ty = cast<IntegerType>(I.getType());
+
+ // Check for (X << c1) << c2 and (X >> c1) >> c2
+ if (I.getOpcode() == ShiftOp->getOpcode()) {
+ // If this is oversized composite shift, then unsigned shifts get 0, ashr
+ // saturates.
+ if (AmtSum >= TypeBits) {
+ if (I.getOpcode() != Instruction::AShr)
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr.
+ }
+
+ return BinaryOperator::Create(I.getOpcode(), X,
+ ConstantInt::get(Ty, AmtSum));
+ }
+
+ if (ShiftAmt1 == ShiftAmt2) {
+ // If we have ((X >>? C) << C), turn this into X & (-1 << C).
+ if (I.getOpcode() == Instruction::Shl &&
+ ShiftOp->getOpcode() != Instruction::Shl) {
+ APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
+ return BinaryOperator::CreateAnd(X,
+ ConstantInt::get(I.getContext(),Mask));
+ }
+ // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
+ if (I.getOpcode() == Instruction::LShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
+ APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
+ return BinaryOperator::CreateAnd(X,
+ ConstantInt::get(I.getContext(), Mask));
+ }
+ } else if (ShiftAmt1 < ShiftAmt2) {
+ uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
+
+ // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2)
+ if (I.getOpcode() == Instruction::Shl &&
+ ShiftOp->getOpcode() != Instruction::Shl) {
+ assert(ShiftOp->getOpcode() == Instruction::LShr ||
+ ShiftOp->getOpcode() == Instruction::AShr);
+ Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
+
+ APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
+ return BinaryOperator::CreateAnd(Shift,
+ ConstantInt::get(I.getContext(),Mask));
+ }
+
+ // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2)
+ if (I.getOpcode() == Instruction::LShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
+ assert(ShiftOp->getOpcode() == Instruction::Shl);
+ Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
+
+ APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
+ return BinaryOperator::CreateAnd(Shift,
+ ConstantInt::get(I.getContext(),Mask));
+ }
+
+ // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in.
+ } else {
+ assert(ShiftAmt2 < ShiftAmt1);
+ uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
+
+ // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
+ if (I.getOpcode() == Instruction::Shl &&
+ ShiftOp->getOpcode() != Instruction::Shl) {
+ Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X,
+ ConstantInt::get(Ty, ShiftDiff));
+
+ APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
+ return BinaryOperator::CreateAnd(Shift,
+ ConstantInt::get(I.getContext(),Mask));
+ }
+
+ // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2)
+ if (I.getOpcode() == Instruction::LShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
+ Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
+
+ APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
+ return BinaryOperator::CreateAnd(Shift,
+ ConstantInt::get(I.getContext(),Mask));
+ }
+
+ // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in.
+ }
+ }
+ return 0;
+}
+
+Instruction *InstCombiner::visitShl(BinaryOperator &I) {
+ return commonShiftTransforms(I);
+}
+
+Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
+ if (Instruction *R = commonShiftTransforms(I))
+ return R;
+
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1))
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op0)) {
+ unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
+ // ctlz.i32(x)>>5 --> zext(x == 0)
+ // cttz.i32(x)>>5 --> zext(x == 0)
+ // ctpop.i32(x)>>5 --> zext(x == -1)
+ if ((II->getIntrinsicID() == Intrinsic::ctlz ||
+ II->getIntrinsicID() == Intrinsic::cttz ||
+ II->getIntrinsicID() == Intrinsic::ctpop) &&
+ isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == Op1C->getZExtValue()){
+ bool isCtPop = II->getIntrinsicID() == Intrinsic::ctpop;
+ Constant *RHS = ConstantInt::getSigned(Op0->getType(), isCtPop ? -1:0);
+ Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS);
+ return new ZExtInst(Cmp, II->getType());
+ }
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
+ if (Instruction *R = commonShiftTransforms(I))
+ return R;
+
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) {
+ // ashr int -1, X = -1 (for any arithmetic shift rights of ~0)
+ if (CSI->isAllOnesValue())
+ return ReplaceInstUsesWith(I, CSI);
+ }
+
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ // If the input is a SHL by the same constant (ashr (shl X, C), C), then we
+ // have a sign-extend idiom.
+ Value *X;
+ if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1)))) {
+ // If the input value is known to already be sign extended enough, delete
+ // the extension.
+ if (ComputeNumSignBits(X) > Op1C->getZExtValue())
+ return ReplaceInstUsesWith(I, X);
+
+ // If the input is an extension from the shifted amount value, e.g.
+ // %x = zext i8 %A to i32
+ // %y = shl i32 %x, 24
+ // %z = ashr %y, 24
+ // then turn this into "z = sext i8 A to i32".
+ if (ZExtInst *ZI = dyn_cast<ZExtInst>(X)) {
+ uint32_t SrcBits = ZI->getOperand(0)->getType()->getScalarSizeInBits();
+ uint32_t DestBits = ZI->getType()->getScalarSizeInBits();
+ if (Op1C->getZExtValue() == DestBits-SrcBits)
+ return new SExtInst(ZI->getOperand(0), ZI->getType());
+ }
+ }
+ }
+
+ // See if we can turn a signed shr into an unsigned shr.
+ if (MaskedValueIsZero(Op0,
+ APInt::getSignBit(I.getType()->getScalarSizeInBits())))
+ return BinaryOperator::CreateLShr(Op0, Op1);
+
+ // Arithmetic shifting an all-sign-bit value is a no-op.
+ unsigned NumSignBits = ComputeNumSignBits(Op0);
+ if (NumSignBits == Op0->getType()->getScalarSizeInBits())
+ return ReplaceInstUsesWith(I, Op0);
+
+ return 0;
+}
+
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
new file mode 100644
index 0000000..adf7a76
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -0,0 +1,1113 @@
+//===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains logic for simplifying instructions based on information
+// about how they are used.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "InstCombine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/IntrinsicInst.h"
+
+using namespace llvm;
+
+
+/// ShrinkDemandedConstant - Check to see if the specified operand of the
+/// specified instruction is a constant integer. If so, check to see if there
+/// are any bits set in the constant that are not demanded. If so, shrink the
+/// constant and return true.
+static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
+ APInt Demanded) {
+ assert(I && "No instruction?");
+ assert(OpNo < I->getNumOperands() && "Operand index too large");
+
+ // If the operand is not a constant integer, nothing to do.
+ ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo));
+ if (!OpC) return false;
+
+ // If there are no bits set that aren't demanded, nothing to do.
+ Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
+ if ((~Demanded & OpC->getValue()) == 0)
+ return false;
+
+ // This instruction is producing bits that are not demanded. Shrink the RHS.
+ Demanded &= OpC->getValue();
+ I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
+ return true;
+}
+
+
+
+/// SimplifyDemandedInstructionBits - Inst is an integer instruction that
+/// SimplifyDemandedBits knows about. See if the instruction has any
+/// properties that allow us to simplify its operands.
+bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
+ unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
+
+ Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask,
+ KnownZero, KnownOne, 0);
+ if (V == 0) return false;
+ if (V == &Inst) return true;
+ ReplaceInstUsesWith(Inst, V);
+ return true;
+}
+
+/// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
+/// specified instruction operand if possible, updating it in place. It returns
+/// true if it made any change and false otherwise.
+bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
+ APInt &KnownZero, APInt &KnownOne,
+ unsigned Depth) {
+ Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
+ KnownZero, KnownOne, Depth);
+ if (NewVal == 0) return false;
+ U = NewVal;
+ return true;
+}
+
+
+/// SimplifyDemandedUseBits - This function attempts to replace V with a simpler
+/// value based on the demanded bits. When this function is called, it is known
+/// that only the bits set in DemandedMask of the result of V are ever used
+/// downstream. Consequently, depending on the mask and V, it may be possible
+/// to replace V with a constant or one of its operands. In such cases, this
+/// function does the replacement and returns true. In all other cases, it
+/// returns false after analyzing the expression and setting KnownOne and known
+/// to be one in the expression. KnownZero contains all the bits that are known
+/// to be zero in the expression. These are provided to potentially allow the
+/// caller (which might recursively be SimplifyDemandedBits itself) to simplify
+/// the expression. KnownOne and KnownZero always follow the invariant that
+/// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
+/// the bits in KnownOne and KnownZero may only be accurate for those bits set
+/// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
+/// and KnownOne must all be the same.
+///
+/// This returns null if it did not change anything and it permits no
+/// simplification. This returns V itself if it did some simplification of V's
+/// operands based on the information about what bits are demanded. This returns
+/// some other non-null value if it found out that V is equal to another value
+/// in the context where the specified bits are demanded, but not for all users.
+Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
+ APInt &KnownZero, APInt &KnownOne,
+ unsigned Depth) {
+ assert(V != 0 && "Null pointer of Value???");
+ assert(Depth <= 6 && "Limit Search Depth");
+ uint32_t BitWidth = DemandedMask.getBitWidth();
+ const Type *VTy = V->getType();
+ assert((TD || !VTy->isPointerTy()) &&
+ "SimplifyDemandedBits needs to know bit widths!");
+ assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
+ (!VTy->isIntOrIntVectorTy() ||
+ VTy->getScalarSizeInBits() == BitWidth) &&
+ KnownZero.getBitWidth() == BitWidth &&
+ KnownOne.getBitWidth() == BitWidth &&
+ "Value *V, DemandedMask, KnownZero and KnownOne "
+ "must have same BitWidth");
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ // We know all of the bits for a constant!
+ KnownOne = CI->getValue() & DemandedMask;
+ KnownZero = ~KnownOne & DemandedMask;
+ return 0;
+ }
+ if (isa<ConstantPointerNull>(V)) {
+ // We know all of the bits for a constant!
+ KnownOne.clear();
+ KnownZero = DemandedMask;
+ return 0;
+ }
+
+ KnownZero.clear();
+ KnownOne.clear();
+ if (DemandedMask == 0) { // Not demanding any bits from V.
+ if (isa<UndefValue>(V))
+ return 0;
+ return UndefValue::get(VTy);
+ }
+
+ if (Depth == 6) // Limit search depth.
+ return 0;
+
+ APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) {
+ ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
+ return 0; // Only analyze instructions.
+ }
+
+ // If there are multiple uses of this value and we aren't at the root, then
+ // we can't do any simplifications of the operands, because DemandedMask
+ // only reflects the bits demanded by *one* of the users.
+ if (Depth != 0 && !I->hasOneUse()) {
+ // Despite the fact that we can't simplify this instruction in all User's
+ // context, we can at least compute the knownzero/knownone bits, and we can
+ // do simplifications that apply to *just* the one user if we know that
+ // this instruction has a simpler value in that context.
+ if (I->getOpcode() == Instruction::And) {
+ // If either the LHS or the RHS are Zero, the result is zero.
+ ComputeMaskedBits(I->getOperand(1), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownZero,
+ LHSKnownZero, LHSKnownOne, Depth+1);
+
+ // If all of the demanded bits are known 1 on one side, return the other.
+ // These bits cannot contribute to the result of the 'and' in this
+ // context.
+ if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
+ (DemandedMask & ~LHSKnownZero))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
+ (DemandedMask & ~RHSKnownZero))
+ return I->getOperand(1);
+
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
+ return Constant::getNullValue(VTy);
+
+ } else if (I->getOpcode() == Instruction::Or) {
+ // We can simplify (X|Y) -> X or Y in the user's context if we know that
+ // only bits from X or Y are demanded.
+
+ // If either the LHS or the RHS are One, the result is One.
+ ComputeMaskedBits(I->getOperand(1), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1);
+ ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownOne,
+ LHSKnownZero, LHSKnownOne, Depth+1);
+
+ // If all of the demanded bits are known zero on one side, return the
+ // other. These bits cannot contribute to the result of the 'or' in this
+ // context.
+ if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
+ (DemandedMask & ~LHSKnownOne))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
+ (DemandedMask & ~RHSKnownOne))
+ return I->getOperand(1);
+
+ // If all of the potentially set bits on one side are known to be set on
+ // the other side, just use the 'other' side.
+ if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
+ (DemandedMask & (~RHSKnownZero)))
+ return I->getOperand(0);
+ if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
+ (DemandedMask & (~LHSKnownZero)))
+ return I->getOperand(1);
+ }
+
+ // Compute the KnownZero/KnownOne bits to simplify things downstream.
+ ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth);
+ return 0;
+ }
+
+ // If this is the root being simplified, allow it to have multiple uses,
+ // just set the DemandedMask to all bits so that we can try to simplify the
+ // operands. This allows visitTruncInst (for example) to simplify the
+ // operand of a trunc without duplicating all the logic below.
+ if (Depth == 0 && !V->hasOneUse())
+ DemandedMask = APInt::getAllOnesValue(BitWidth);
+
+ switch (I->getOpcode()) {
+ default:
+ ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth);
+ break;
+ case Instruction::And:
+ // If either the LHS or the RHS are Zero, the result is zero.
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known 1 on one side, return the other.
+ // These bits cannot contribute to the result of the 'and'.
+ if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
+ (DemandedMask & ~LHSKnownZero))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
+ (DemandedMask & ~RHSKnownZero))
+ return I->getOperand(1);
+
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
+ return Constant::getNullValue(VTy);
+
+ // If the RHS is a constant, see if we can simplify it.
+ if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
+ return I;
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne = RHSKnownOne & LHSKnownOne;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero = RHSKnownZero | LHSKnownZero;
+ break;
+ case Instruction::Or:
+ // If either the LHS or the RHS are One, the result is One.
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'or'.
+ if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
+ (DemandedMask & ~LHSKnownOne))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
+ (DemandedMask & ~RHSKnownOne))
+ return I->getOperand(1);
+
+ // If all of the potentially set bits on one side are known to be set on
+ // the other side, just use the 'other' side.
+ if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
+ (DemandedMask & (~RHSKnownZero)))
+ return I->getOperand(0);
+ if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
+ (DemandedMask & (~LHSKnownZero)))
+ return I->getOperand(1);
+
+ // If the RHS is a constant, see if we can simplify it.
+ if (ShrinkDemandedConstant(I, 1, DemandedMask))
+ return I;
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero = RHSKnownZero & LHSKnownZero;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne = RHSKnownOne | LHSKnownOne;
+ break;
+ case Instruction::Xor: {
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'xor'.
+ if ((DemandedMask & RHSKnownZero) == DemandedMask)
+ return I->getOperand(0);
+ if ((DemandedMask & LHSKnownZero) == DemandedMask)
+ return I->getOperand(1);
+
+ // If all of the demanded bits are known to be zero on one side or the
+ // other, turn this into an *inclusive* or.
+ // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+ if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
+ Instruction *Or =
+ BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
+ I->getName());
+ return InsertNewInstBefore(Or, *I);
+ }
+
+ // If all of the demanded bits on one side are known, and all of the set
+ // bits on that side are also known to be set on the other side, turn this
+ // into an AND, as we know the bits will be cleared.
+ // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+ if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
+ // all known
+ if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
+ Constant *AndC = Constant::getIntegerValue(VTy,
+ ~RHSKnownOne & DemandedMask);
+ Instruction *And =
+ BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
+ return InsertNewInstBefore(And, *I);
+ }
+ }
+
+ // If the RHS is a constant, see if we can simplify it.
+ // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
+ if (ShrinkDemandedConstant(I, 1, DemandedMask))
+ return I;
+
+ // If our LHS is an 'and' and if it has one use, and if any of the bits we
+ // are flipping are known to be set, then the xor is just resetting those
+ // bits to zero. We can just knock out bits from the 'and' and the 'xor',
+ // simplifying both of them.
+ if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
+ if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
+ isa<ConstantInt>(I->getOperand(1)) &&
+ isa<ConstantInt>(LHSInst->getOperand(1)) &&
+ (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
+ ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
+ ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
+ APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
+
+ Constant *AndC =
+ ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
+ Instruction *NewAnd =
+ BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
+ InsertNewInstBefore(NewAnd, *I);
+
+ Constant *XorC =
+ ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
+ Instruction *NewXor =
+ BinaryOperator::CreateXor(NewAnd, XorC, "tmp");
+ return InsertNewInstBefore(NewXor, *I);
+ }
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero);
+ break;
+ }
+ case Instruction::Select:
+ if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask,
+ RHSKnownZero, RHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
+ ShrinkDemandedConstant(I, 2, DemandedMask))
+ return I;
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne = RHSKnownOne & LHSKnownOne;
+ KnownZero = RHSKnownZero & LHSKnownZero;
+ break;
+ case Instruction::Trunc: {
+ unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
+ DemandedMask.zext(truncBf);
+ KnownZero.zext(truncBf);
+ KnownOne.zext(truncBf);
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
+ KnownZero, KnownOne, Depth+1))
+ return I;
+ DemandedMask.trunc(BitWidth);
+ KnownZero.trunc(BitWidth);
+ KnownOne.trunc(BitWidth);
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ break;
+ }
+ case Instruction::BitCast:
+ if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
+ return 0; // vector->int or fp->int?
+
+ if (const VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
+ if (const VectorType *SrcVTy =
+ dyn_cast<VectorType>(I->getOperand(0)->getType())) {
+ if (DstVTy->getNumElements() != SrcVTy->getNumElements())
+ // Don't touch a bitcast between vectors of different element counts.
+ return 0;
+ } else
+ // Don't touch a scalar-to-vector bitcast.
+ return 0;
+ } else if (I->getOperand(0)->getType()->isVectorTy())
+ // Don't touch a vector-to-scalar bitcast.
+ return 0;
+
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
+ KnownZero, KnownOne, Depth+1))
+ return I;
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ break;
+ case Instruction::ZExt: {
+ // Compute the bits in the result that are not present in the input.
+ unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
+
+ DemandedMask.trunc(SrcBitWidth);
+ KnownZero.trunc(SrcBitWidth);
+ KnownOne.trunc(SrcBitWidth);
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
+ KnownZero, KnownOne, Depth+1))
+ return I;
+ DemandedMask.zext(BitWidth);
+ KnownZero.zext(BitWidth);
+ KnownOne.zext(BitWidth);
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ // The top bits are known to be zero.
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+ break;
+ }
+ case Instruction::SExt: {
+ // Compute the bits in the result that are not present in the input.
+ unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
+
+ APInt InputDemandedBits = DemandedMask &
+ APInt::getLowBitsSet(BitWidth, SrcBitWidth);
+
+ APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
+ // If any of the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ if ((NewBits & DemandedMask) != 0)
+ InputDemandedBits.set(SrcBitWidth-1);
+
+ InputDemandedBits.trunc(SrcBitWidth);
+ KnownZero.trunc(SrcBitWidth);
+ KnownOne.trunc(SrcBitWidth);
+ if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
+ KnownZero, KnownOne, Depth+1))
+ return I;
+ InputDemandedBits.zext(BitWidth);
+ KnownZero.zext(BitWidth);
+ KnownOne.zext(BitWidth);
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+
+ // If the input sign bit is known zero, or if the NewBits are not demanded
+ // convert this into a zero extension.
+ if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
+ // Convert to ZExt cast
+ CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
+ return InsertNewInstBefore(NewCast, *I);
+ } else if (KnownOne[SrcBitWidth-1]) { // Input sign bit known set
+ KnownOne |= NewBits;
+ }
+ break;
+ }
+ case Instruction::Add: {
+ // Figure out what the input bits are. If the top bits of the and result
+ // are not demanded, then the add doesn't demand them from its input
+ // either.
+ unsigned NLZ = DemandedMask.countLeadingZeros();
+
+ // If there is a constant on the RHS, there are a variety of xformations
+ // we can do.
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // If null, this should be simplified elsewhere. Some of the xforms here
+ // won't work if the RHS is zero.
+ if (RHS->isZero())
+ break;
+
+ // If the top bit of the output is demanded, demand everything from the
+ // input. Otherwise, we demand all the input bits except NLZ top bits.
+ APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
+
+ // Find information about known zero/one bits in the input.
+ if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+
+ // If the RHS of the add has bits set that can't affect the input, reduce
+ // the constant.
+ if (ShrinkDemandedConstant(I, 1, InDemandedBits))
+ return I;
+
+ // Avoid excess work.
+ if (LHSKnownZero == 0 && LHSKnownOne == 0)
+ break;
+
+ // Turn it into OR if input bits are zero.
+ if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
+ Instruction *Or =
+ BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
+ I->getName());
+ return InsertNewInstBefore(Or, *I);
+ }
+
+ // We can say something about the output known-zero and known-one bits,
+ // depending on potential carries from the input constant and the
+ // unknowns. For example if the LHS is known to have at most the 0x0F0F0
+ // bits set and the RHS constant is 0x01001, then we know we have a known
+ // one mask of 0x00001 and a known zero mask of 0xE0F0E.
+
+ // To compute this, we first compute the potential carry bits. These are
+ // the bits which may be modified. I'm not aware of a better way to do
+ // this scan.
+ const APInt &RHSVal = RHS->getValue();
+ APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
+
+ // Now that we know which bits have carries, compute the known-1/0 sets.
+
+ // Bits are known one if they are known zero in one operand and one in the
+ // other, and there is no input carry.
+ KnownOne = ((LHSKnownZero & RHSVal) |
+ (LHSKnownOne & ~RHSVal)) & ~CarryBits;
+
+ // Bits are known zero if they are known zero in both operands and there
+ // is no input carry.
+ KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
+ } else {
+ // If the high-bits of this ADD are not demanded, then it does not demand
+ // the high bits of its LHS or RHS.
+ if (DemandedMask[BitWidth-1] == 0) {
+ // Right fill the mask of bits for this ADD to demand the most
+ // significant bit and all those below it.
+ APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
+ LHSKnownZero, LHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ }
+ }
+ break;
+ }
+ case Instruction::Sub:
+ // If the high-bits of this SUB are not demanded, then it does not demand
+ // the high bits of its LHS or RHS.
+ if (DemandedMask[BitWidth-1] == 0) {
+ // Right fill the mask of bits for this SUB to demand the most
+ // significant bit and all those below it.
+ uint32_t NLZ = DemandedMask.countLeadingZeros();
+ APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
+ LHSKnownZero, LHSKnownOne, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+ }
+ // Otherwise just hand the sub off to ComputeMaskedBits to fill in
+ // the known zeros and ones.
+ ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
+ break;
+ case Instruction::Shl:
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+ APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
+ KnownZero, KnownOne, Depth+1))
+ return I;
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ KnownZero <<= ShiftAmt;
+ KnownOne <<= ShiftAmt;
+ // low bits known zero.
+ if (ShiftAmt)
+ KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+ }
+ break;
+ case Instruction::LShr:
+ // For a logical shift right
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+
+ // Unsigned shift right.
+ APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
+ KnownZero, KnownOne, Depth+1))
+ return I;
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+ KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
+ if (ShiftAmt) {
+ // Compute the new bits that are at the top now.
+ APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+ KnownZero |= HighBits; // high bits known zero.
+ }
+ }
+ break;
+ case Instruction::AShr:
+ // If this is an arithmetic shift right and only the low-bit is set, we can
+ // always convert this into a logical shr, even if the shift amount is
+ // variable. The low bit of the shift cannot be an input sign bit unless
+ // the shift amount is >= the size of the datatype, which is undefined.
+ if (DemandedMask == 1) {
+ // Perform the logical shift right.
+ Instruction *NewVal = BinaryOperator::CreateLShr(
+ I->getOperand(0), I->getOperand(1), I->getName());
+ return InsertNewInstBefore(NewVal, *I);
+ }
+
+ // If the sign bit is the only bit demanded by this ashr, then there is no
+ // need to do it, the shift doesn't change the high bit.
+ if (DemandedMask.isSignBit())
+ return I->getOperand(0);
+
+ if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint32_t ShiftAmt = SA->getLimitedValue(BitWidth);
+
+ // Signed shift right.
+ APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
+ // If any of the "high bits" are demanded, we should set the sign bit as
+ // demanded.
+ if (DemandedMask.countLeadingZeros() <= ShiftAmt)
+ DemandedMaskIn.set(BitWidth-1);
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
+ KnownZero, KnownOne, Depth+1))
+ return I;
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ // Compute the new bits that are at the top now.
+ APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+ KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+ KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
+
+ // Handle the sign bits.
+ APInt SignBit(APInt::getSignBit(BitWidth));
+ // Adjust to where it is now in the mask.
+ SignBit = APIntOps::lshr(SignBit, ShiftAmt);
+
+ // If the input sign bit is known to be zero, or if none of the top bits
+ // are demanded, turn this into an unsigned shift right.
+ if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
+ (HighBits & ~DemandedMask) == HighBits) {
+ // Perform the logical shift right.
+ Instruction *NewVal = BinaryOperator::CreateLShr(
+ I->getOperand(0), SA, I->getName());
+ return InsertNewInstBefore(NewVal, *I);
+ } else if ((KnownOne & SignBit) != 0) { // New bits are known one.
+ KnownOne |= HighBits;
+ }
+ }
+ break;
+ case Instruction::SRem:
+ if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ APInt RA = Rem->getValue().abs();
+ if (RA.isPowerOf2()) {
+ if (DemandedMask.ult(RA)) // srem won't affect demanded bits
+ return I->getOperand(0);
+
+ APInt LowBits = RA - 1;
+ APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+ if (SimplifyDemandedBits(I->getOperandUse(0), Mask2,
+ LHSKnownZero, LHSKnownOne, Depth+1))
+ return I;
+
+ // The low bits of LHS are unchanged by the srem.
+ KnownZero = LHSKnownZero & LowBits;
+ KnownOne = LHSKnownOne & LowBits;
+
+ // If LHS is non-negative or has all low bits zero, then the upper bits
+ // are all zero.
+ if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
+ KnownZero |= ~LowBits;
+
+ // If LHS is negative and not all low bits are zero, then the upper bits
+ // are all one.
+ if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0))
+ KnownOne |= ~LowBits;
+
+ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ }
+ }
+ break;
+ case Instruction::URem: {
+ APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes,
+ KnownZero2, KnownOne2, Depth+1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), AllOnes,
+ KnownZero2, KnownOne2, Depth+1))
+ return I;
+
+ unsigned Leaders = KnownZero2.countLeadingOnes();
+ Leaders = std::max(Leaders,
+ KnownZero2.countLeadingOnes());
+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
+ break;
+ }
+ case Instruction::Call:
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::bswap: {
+ // If the only bits demanded come from one byte of the bswap result,
+ // just shift the input byte into position to eliminate the bswap.
+ unsigned NLZ = DemandedMask.countLeadingZeros();
+ unsigned NTZ = DemandedMask.countTrailingZeros();
+
+ // Round NTZ down to the next byte. If we have 11 trailing zeros, then
+ // we need all the bits down to bit 8. Likewise, round NLZ. If we
+ // have 14 leading zeros, round to 8.
+ NLZ &= ~7;
+ NTZ &= ~7;
+ // If we need exactly one byte, we can do this transformation.
+ if (BitWidth-NLZ-NTZ == 8) {
+ unsigned ResultBit = NTZ;
+ unsigned InputBit = BitWidth-NTZ-8;
+
+ // Replace this with either a left or right shift to get the byte into
+ // the right place.
+ Instruction *NewVal;
+ if (InputBit > ResultBit)
+ NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
+ ConstantInt::get(I->getType(), InputBit-ResultBit));
+ else
+ NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
+ ConstantInt::get(I->getType(), ResultBit-InputBit));
+ NewVal->takeName(I);
+ return InsertNewInstBefore(NewVal, *I);
+ }
+
+ // TODO: Could compute known zero/one bits based on the input.
+ break;
+ }
+ }
+ }
+ ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
+ break;
+ }
+
+ // If the client is only demanding bits that we know, return the known
+ // constant.
+ if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
+ return Constant::getIntegerValue(VTy, KnownOne);
+ return 0;
+}
+
+
+/// SimplifyDemandedVectorElts - The specified value produces a vector with
+/// any number of elements. DemandedElts contains the set of elements that are
+/// actually used by the caller. This method analyzes which elements of the
+/// operand are undef and returns that information in UndefElts.
+///
+/// If the information about demanded elements can be used to simplify the
+/// operation, the operation is simplified, then the resultant value is
+/// returned. This returns null if no change was made.
+Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
+ APInt &UndefElts,
+ unsigned Depth) {
+ unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
+ APInt EltMask(APInt::getAllOnesValue(VWidth));
+ assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
+
+ if (isa<UndefValue>(V)) {
+ // If the entire vector is undefined, just return this info.
+ UndefElts = EltMask;
+ return 0;
+ }
+
+ if (DemandedElts == 0) { // If nothing is demanded, provide undef.
+ UndefElts = EltMask;
+ return UndefValue::get(V->getType());
+ }
+
+ UndefElts = 0;
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
+ const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
+ Constant *Undef = UndefValue::get(EltTy);
+
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0; i != VWidth; ++i)
+ if (!DemandedElts[i]) { // If not demanded, set to undef.
+ Elts.push_back(Undef);
+ UndefElts.set(i);
+ } else if (isa<UndefValue>(CV->getOperand(i))) { // Already undef.
+ Elts.push_back(Undef);
+ UndefElts.set(i);
+ } else { // Otherwise, defined.
+ Elts.push_back(CV->getOperand(i));
+ }
+
+ // If we changed the constant, return it.
+ Constant *NewCP = ConstantVector::get(Elts);
+ return NewCP != CV ? NewCP : 0;
+ }
+
+ if (isa<ConstantAggregateZero>(V)) {
+ // Simplify the CAZ to a ConstantVector where the non-demanded elements are
+ // set to undef.
+
+ // Check if this is identity. If so, return 0 since we are not simplifying
+ // anything.
+ if (DemandedElts.isAllOnesValue())
+ return 0;
+
+ const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
+ Constant *Zero = Constant::getNullValue(EltTy);
+ Constant *Undef = UndefValue::get(EltTy);
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0; i != VWidth; ++i) {
+ Constant *Elt = DemandedElts[i] ? Zero : Undef;
+ Elts.push_back(Elt);
+ }
+ UndefElts = DemandedElts ^ EltMask;
+ return ConstantVector::get(Elts);
+ }
+
+ // Limit search depth.
+ if (Depth == 10)
+ return 0;
+
+ // If multiple users are using the root value, procede with
+ // simplification conservatively assuming that all elements
+ // are needed.
+ if (!V->hasOneUse()) {
+ // Quit if we find multiple users of a non-root value though.
+ // They'll be handled when it's their turn to be visited by
+ // the main instcombine process.
+ if (Depth != 0)
+ // TODO: Just compute the UndefElts information recursively.
+ return 0;
+
+ // Conservatively assume that all elements are needed.
+ DemandedElts = EltMask;
+ }
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return 0; // Only analyze instructions.
+
+ bool MadeChange = false;
+ APInt UndefElts2(VWidth, 0);
+ Value *TmpV;
+ switch (I->getOpcode()) {
+ default: break;
+
+ case Instruction::InsertElement: {
+ // If this is a variable index, we don't know which element it overwrites.
+ // demand exactly the same input as we produce.
+ ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
+ if (Idx == 0) {
+ // Note that we can't propagate undef elt info, because we don't know
+ // which elt is getting updated.
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
+ UndefElts2, Depth+1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+ break;
+ }
+
+ // If this is inserting an element that isn't demanded, remove this
+ // insertelement.
+ unsigned IdxNo = Idx->getZExtValue();
+ if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
+ Worklist.Add(I);
+ return I->getOperand(0);
+ }
+
+ // Otherwise, the element inserted overwrites whatever was there, so the
+ // input demanded set is simpler than the output set.
+ APInt DemandedElts2 = DemandedElts;
+ DemandedElts2.clear(IdxNo);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
+ UndefElts, Depth+1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+
+ // The inserted element is defined.
+ UndefElts.clear(IdxNo);
+ break;
+ }
+ case Instruction::ShuffleVector: {
+ ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
+ uint64_t LHSVWidth =
+ cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
+ APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
+ for (unsigned i = 0; i < VWidth; i++) {
+ if (DemandedElts[i]) {
+ unsigned MaskVal = Shuffle->getMaskValue(i);
+ if (MaskVal != -1u) {
+ assert(MaskVal < LHSVWidth * 2 &&
+ "shufflevector mask index out of range!");
+ if (MaskVal < LHSVWidth)
+ LeftDemanded.set(MaskVal);
+ else
+ RightDemanded.set(MaskVal - LHSVWidth);
+ }
+ }
+ }
+
+ APInt UndefElts4(LHSVWidth, 0);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
+ UndefElts4, Depth+1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+
+ APInt UndefElts3(LHSVWidth, 0);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
+ UndefElts3, Depth+1);
+ if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+
+ bool NewUndefElts = false;
+ for (unsigned i = 0; i < VWidth; i++) {
+ unsigned MaskVal = Shuffle->getMaskValue(i);
+ if (MaskVal == -1u) {
+ UndefElts.set(i);
+ } else if (MaskVal < LHSVWidth) {
+ if (UndefElts4[MaskVal]) {
+ NewUndefElts = true;
+ UndefElts.set(i);
+ }
+ } else {
+ if (UndefElts3[MaskVal - LHSVWidth]) {
+ NewUndefElts = true;
+ UndefElts.set(i);
+ }
+ }
+ }
+
+ if (NewUndefElts) {
+ // Add additional discovered undefs.
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0; i < VWidth; ++i) {
+ if (UndefElts[i])
+ Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
+ else
+ Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()),
+ Shuffle->getMaskValue(i)));
+ }
+ I->setOperand(2, ConstantVector::get(Elts));
+ MadeChange = true;
+ }
+ break;
+ }
+ case Instruction::BitCast: {
+ // Vector->vector casts only.
+ const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
+ if (!VTy) break;
+ unsigned InVWidth = VTy->getNumElements();
+ APInt InputDemandedElts(InVWidth, 0);
+ unsigned Ratio;
+
+ if (VWidth == InVWidth) {
+ // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
+ // elements as are demanded of us.
+ Ratio = 1;
+ InputDemandedElts = DemandedElts;
+ } else if (VWidth > InVWidth) {
+ // Untested so far.
+ break;
+
+ // If there are more elements in the result than there are in the source,
+ // then an input element is live if any of the corresponding output
+ // elements are live.
+ Ratio = VWidth/InVWidth;
+ for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
+ if (DemandedElts[OutIdx])
+ InputDemandedElts.set(OutIdx/Ratio);
+ }
+ } else {
+ // Untested so far.
+ break;
+
+ // If there are more elements in the source than there are in the result,
+ // then an input element is live if the corresponding output element is
+ // live.
+ Ratio = InVWidth/VWidth;
+ for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+ if (DemandedElts[InIdx/Ratio])
+ InputDemandedElts.set(InIdx);
+ }
+
+ // div/rem demand all inputs, because they don't want divide by zero.
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
+ UndefElts2, Depth+1);
+ if (TmpV) {
+ I->setOperand(0, TmpV);
+ MadeChange = true;
+ }
+
+ UndefElts = UndefElts2;
+ if (VWidth > InVWidth) {
+ llvm_unreachable("Unimp");
+ // If there are more elements in the result than there are in the source,
+ // then an output element is undef if the corresponding input element is
+ // undef.
+ for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
+ if (UndefElts2[OutIdx/Ratio])
+ UndefElts.set(OutIdx);
+ } else if (VWidth < InVWidth) {
+ llvm_unreachable("Unimp");
+ // If there are more elements in the source than there are in the result,
+ // then a result element is undef if all of the corresponding input
+ // elements are undef.
+ UndefElts = ~0ULL >> (64-VWidth); // Start out all undef.
+ for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+ if (!UndefElts2[InIdx]) // Not undef?
+ UndefElts.clear(InIdx/Ratio); // Clear undef bit.
+ }
+ break;
+ }
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // div/rem demand all inputs, because they don't want divide by zero.
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
+ UndefElts, Depth+1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
+ UndefElts2, Depth+1);
+ if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+
+ // Output elements are undefined if both are undefined. Consider things
+ // like undef&0. The result is known zero, not undef.
+ UndefElts &= UndefElts2;
+ break;
+
+ case Instruction::Call: {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
+ if (!II) break;
+ switch (II->getIntrinsicID()) {
+ default: break;
+
+ // Binary vector operations that work column-wise. A dest element is a
+ // function of the corresponding input elements from the two inputs.
+ case Intrinsic::x86_sse_sub_ss:
+ case Intrinsic::x86_sse_mul_ss:
+ case Intrinsic::x86_sse_min_ss:
+ case Intrinsic::x86_sse_max_ss:
+ case Intrinsic::x86_sse2_sub_sd:
+ case Intrinsic::x86_sse2_mul_sd:
+ case Intrinsic::x86_sse2_min_sd:
+ case Intrinsic::x86_sse2_max_sd:
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
+ UndefElts, Depth+1);
+ if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
+ UndefElts2, Depth+1);
+ if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+
+ // If only the low elt is demanded and this is a scalarizable intrinsic,
+ // scalarize it now.
+ if (DemandedElts == 1) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::x86_sse_sub_ss:
+ case Intrinsic::x86_sse_mul_ss:
+ case Intrinsic::x86_sse2_sub_sd:
+ case Intrinsic::x86_sse2_mul_sd:
+ // TODO: Lower MIN/MAX/ABS/etc
+ Value *LHS = II->getArgOperand(0);
+ Value *RHS = II->getArgOperand(1);
+ // Extract the element as scalars.
+ LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS,
+ ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
+ RHS = InsertNewInstBefore(ExtractElementInst::Create(RHS,
+ ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
+
+ switch (II->getIntrinsicID()) {
+ default: llvm_unreachable("Case stmts out of sync!");
+ case Intrinsic::x86_sse_sub_ss:
+ case Intrinsic::x86_sse2_sub_sd:
+ TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS,
+ II->getName()), *II);
+ break;
+ case Intrinsic::x86_sse_mul_ss:
+ case Intrinsic::x86_sse2_mul_sd:
+ TmpV = InsertNewInstBefore(BinaryOperator::CreateFMul(LHS, RHS,
+ II->getName()), *II);
+ break;
+ }
+
+ Instruction *New =
+ InsertElementInst::Create(
+ UndefValue::get(II->getType()), TmpV,
+ ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false),
+ II->getName());
+ InsertNewInstBefore(New, *II);
+ return New;
+ }
+ }
+
+ // Output elements are undefined if both are undefined. Consider things
+ // like undef&0. The result is known zero, not undef.
+ UndefElts &= UndefElts2;
+ break;
+ }
+ break;
+ }
+ }
+ return MadeChange ? I : 0;
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
new file mode 100644
index 0000000..a58124d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -0,0 +1,561 @@
+//===- InstCombineVectorOps.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements instcombine for ExtractElement, InsertElement and
+// ShuffleVector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+using namespace llvm;
+
+/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
+/// is to leave as a vector operation.
+static bool CheapToScalarize(Value *V, bool isConstant) {
+ if (isa<ConstantAggregateZero>(V))
+ return true;
+ if (ConstantVector *C = dyn_cast<ConstantVector>(V)) {
+ if (isConstant) return true;
+ // If all elts are the same, we can extract.
+ Constant *Op0 = C->getOperand(0);
+ for (unsigned i = 1; i < C->getNumOperands(); ++i)
+ if (C->getOperand(i) != Op0)
+ return false;
+ return true;
+ }
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // Insert element gets simplified to the inserted element or is deleted if
+ // this is constant idx extract element and its a constant idx insertelt.
+ if (I->getOpcode() == Instruction::InsertElement && isConstant &&
+ isa<ConstantInt>(I->getOperand(2)))
+ return true;
+ if (I->getOpcode() == Instruction::Load && I->hasOneUse())
+ return true;
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
+ if (BO->hasOneUse() &&
+ (CheapToScalarize(BO->getOperand(0), isConstant) ||
+ CheapToScalarize(BO->getOperand(1), isConstant)))
+ return true;
+ if (CmpInst *CI = dyn_cast<CmpInst>(I))
+ if (CI->hasOneUse() &&
+ (CheapToScalarize(CI->getOperand(0), isConstant) ||
+ CheapToScalarize(CI->getOperand(1), isConstant)))
+ return true;
+
+ return false;
+}
+
+/// Read and decode a shufflevector mask.
+///
+/// It turns undef elements into values that are larger than the number of
+/// elements in the input.
+static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) {
+ unsigned NElts = SVI->getType()->getNumElements();
+ if (isa<ConstantAggregateZero>(SVI->getOperand(2)))
+ return std::vector<unsigned>(NElts, 0);
+ if (isa<UndefValue>(SVI->getOperand(2)))
+ return std::vector<unsigned>(NElts, 2*NElts);
+
+ std::vector<unsigned> Result;
+ const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2));
+ for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i)
+ if (isa<UndefValue>(*i))
+ Result.push_back(NElts*2); // undef -> 8
+ else
+ Result.push_back(cast<ConstantInt>(*i)->getZExtValue());
+ return Result;
+}
+
+/// FindScalarElement - Given a vector and an element number, see if the scalar
+/// value is already around as a register, for example if it were inserted then
+/// extracted from the vector.
+static Value *FindScalarElement(Value *V, unsigned EltNo) {
+ assert(V->getType()->isVectorTy() && "Not looking at a vector?");
+ const VectorType *PTy = cast<VectorType>(V->getType());
+ unsigned Width = PTy->getNumElements();
+ if (EltNo >= Width) // Out of range access.
+ return UndefValue::get(PTy->getElementType());
+
+ if (isa<UndefValue>(V))
+ return UndefValue::get(PTy->getElementType());
+ if (isa<ConstantAggregateZero>(V))
+ return Constant::getNullValue(PTy->getElementType());
+ if (ConstantVector *CP = dyn_cast<ConstantVector>(V))
+ return CP->getOperand(EltNo);
+
+ if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
+ // If this is an insert to a variable element, we don't know what it is.
+ if (!isa<ConstantInt>(III->getOperand(2)))
+ return 0;
+ unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
+
+ // If this is an insert to the element we are looking for, return the
+ // inserted value.
+ if (EltNo == IIElt)
+ return III->getOperand(1);
+
+ // Otherwise, the insertelement doesn't modify the value, recurse on its
+ // vector input.
+ return FindScalarElement(III->getOperand(0), EltNo);
+ }
+
+ if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
+ unsigned LHSWidth =
+ cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+ unsigned InEl = getShuffleMask(SVI)[EltNo];
+ if (InEl < LHSWidth)
+ return FindScalarElement(SVI->getOperand(0), InEl);
+ else if (InEl < LHSWidth*2)
+ return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
+ else
+ return UndefValue::get(PTy->getElementType());
+ }
+
+ // Otherwise, we don't know.
+ return 0;
+}
+
+Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
+ // If vector val is undef, replace extract with scalar undef.
+ if (isa<UndefValue>(EI.getOperand(0)))
+ return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+
+ // If vector val is constant 0, replace extract with scalar 0.
+ if (isa<ConstantAggregateZero>(EI.getOperand(0)))
+ return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
+
+ if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {
+ // If vector val is constant with all elements the same, replace EI with
+ // that element. When the elements are not identical, we cannot replace yet
+ // (we do that below, but only when the index is constant).
+ Constant *op0 = C->getOperand(0);
+ for (unsigned i = 1; i != C->getNumOperands(); ++i)
+ if (C->getOperand(i) != op0) {
+ op0 = 0;
+ break;
+ }
+ if (op0)
+ return ReplaceInstUsesWith(EI, op0);
+ }
+
+ // If extracting a specified index from the vector, see if we can recursively
+ // find a previously computed scalar that was inserted into the vector.
+ if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
+ unsigned IndexVal = IdxC->getZExtValue();
+ unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
+
+ // If this is extracting an invalid index, turn this into undef, to avoid
+ // crashing the code below.
+ if (IndexVal >= VectorWidth)
+ return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+
+ // This instruction only demands the single element from the input vector.
+ // If the input vector has a single use, simplify it based on this use
+ // property.
+ if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
+ APInt UndefElts(VectorWidth, 0);
+ APInt DemandedMask(VectorWidth, 0);
+ DemandedMask.set(IndexVal);
+ if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
+ DemandedMask, UndefElts)) {
+ EI.setOperand(0, V);
+ return &EI;
+ }
+ }
+
+ if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
+ return ReplaceInstUsesWith(EI, Elt);
+
+ // If the this extractelement is directly using a bitcast from a vector of
+ // the same number of elements, see if we can find the source element from
+ // it. In this case, we will end up needing to bitcast the scalars.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
+ if (const VectorType *VT =
+ dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
+ if (VT->getNumElements() == VectorWidth)
+ if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
+ return new BitCastInst(Elt, EI.getType());
+ }
+ }
+
+ if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
+ // Push extractelement into predecessor operation if legal and
+ // profitable to do so
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ if (I->hasOneUse() &&
+ CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
+ Value *newEI0 =
+ Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
+ EI.getName()+".lhs");
+ Value *newEI1 =
+ Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
+ EI.getName()+".rhs");
+ return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
+ }
+ } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
+ // Extracting the inserted element?
+ if (IE->getOperand(2) == EI.getOperand(1))
+ return ReplaceInstUsesWith(EI, IE->getOperand(1));
+ // If the inserted and extracted elements are constants, they must not
+ // be the same value, extract from the pre-inserted value instead.
+ if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) {
+ Worklist.AddValue(EI.getOperand(0));
+ EI.setOperand(0, IE->getOperand(0));
+ return &EI;
+ }
+ } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) {
+ // If this is extracting an element from a shufflevector, figure out where
+ // it came from and extract from the appropriate input element instead.
+ if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
+ unsigned SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()];
+ Value *Src;
+ unsigned LHSWidth =
+ cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+
+ if (SrcIdx < LHSWidth)
+ Src = SVI->getOperand(0);
+ else if (SrcIdx < LHSWidth*2) {
+ SrcIdx -= LHSWidth;
+ Src = SVI->getOperand(1);
+ } else {
+ return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+ }
+ return ExtractElementInst::Create(Src,
+ ConstantInt::get(Type::getInt32Ty(EI.getContext()),
+ SrcIdx, false));
+ }
+ }
+ // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement)
+ }
+ return 0;
+}
+
+/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
+/// elements from either LHS or RHS, return the shuffle mask and true.
+/// Otherwise, return false.
+static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
+ std::vector<Constant*> &Mask) {
+ assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
+ "Invalid CollectSingleShuffleElements");
+ unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+
+ if (isa<UndefValue>(V)) {
+ Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
+ return true;
+ }
+
+ if (V == LHS) {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
+ return true;
+ }
+
+ if (V == RHS) {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()),
+ i+NumElts));
+ return true;
+ }
+
+ if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+ // If this is an insert of an extract from some other vector, include it.
+ Value *VecOp = IEI->getOperand(0);
+ Value *ScalarOp = IEI->getOperand(1);
+ Value *IdxOp = IEI->getOperand(2);
+
+ if (!isa<ConstantInt>(IdxOp))
+ return false;
+ unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+
+ if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
+ // Okay, we can handle this if the vector we are insertinting into is
+ // transitively ok.
+ if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+ // If so, update the mask to reflect the inserted undef.
+ Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
+ return true;
+ }
+ } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
+ if (isa<ConstantInt>(EI->getOperand(1)) &&
+ EI->getOperand(0)->getType() == V->getType()) {
+ unsigned ExtractedIdx =
+ cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+
+ // This must be extracting from either LHS or RHS.
+ if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
+ // Okay, we can handle this if the vector we are insertinting into is
+ // transitively ok.
+ if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+ // If so, update the mask to reflect the inserted value.
+ if (EI->getOperand(0) == LHS) {
+ Mask[InsertedIdx % NumElts] =
+ ConstantInt::get(Type::getInt32Ty(V->getContext()),
+ ExtractedIdx);
+ } else {
+ assert(EI->getOperand(0) == RHS);
+ Mask[InsertedIdx % NumElts] =
+ ConstantInt::get(Type::getInt32Ty(V->getContext()),
+ ExtractedIdx+NumElts);
+
+ }
+ return true;
+ }
+ }
+ }
+ }
+ }
+ // TODO: Handle shufflevector here!
+
+ return false;
+}
+
+/// CollectShuffleElements - We are building a shuffle of V, using RHS as the
+/// RHS of the shuffle instruction, if it is not null. Return a shuffle mask
+/// that computes V and the LHS value of the shuffle.
+static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
+ Value *&RHS) {
+ assert(V->getType()->isVectorTy() &&
+ (RHS == 0 || V->getType() == RHS->getType()) &&
+ "Invalid shuffle!");
+ unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+
+ if (isa<UndefValue>(V)) {
+ Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
+ return V;
+ } else if (isa<ConstantAggregateZero>(V)) {
+ Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
+ return V;
+ } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+ // If this is an insert of an extract from some other vector, include it.
+ Value *VecOp = IEI->getOperand(0);
+ Value *ScalarOp = IEI->getOperand(1);
+ Value *IdxOp = IEI->getOperand(2);
+
+ if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
+ if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
+ EI->getOperand(0)->getType() == V->getType()) {
+ unsigned ExtractedIdx =
+ cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+ unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+
+ // Either the extracted from or inserted into vector must be RHSVec,
+ // otherwise we'd end up with a shuffle of three inputs.
+ if (EI->getOperand(0) == RHS || RHS == 0) {
+ RHS = EI->getOperand(0);
+ Value *V = CollectShuffleElements(VecOp, Mask, RHS);
+ Mask[InsertedIdx % NumElts] =
+ ConstantInt::get(Type::getInt32Ty(V->getContext()),
+ NumElts+ExtractedIdx);
+ return V;
+ }
+
+ if (VecOp == RHS) {
+ Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
+ // Everything but the extracted element is replaced with the RHS.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (i != InsertedIdx)
+ Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()),
+ NumElts+i);
+ }
+ return V;
+ }
+
+ // If this insertelement is a chain that comes from exactly these two
+ // vectors, return the vector and the effective shuffle.
+ if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask))
+ return EI->getOperand(0);
+ }
+ }
+ }
+ // TODO: Handle shufflevector here!
+
+ // Otherwise, can't do anything fancy. Return an identity vector.
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
+ return V;
+}
+
+Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
+ Value *VecOp = IE.getOperand(0);
+ Value *ScalarOp = IE.getOperand(1);
+ Value *IdxOp = IE.getOperand(2);
+
+ // Inserting an undef or into an undefined place, remove this.
+ if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
+ ReplaceInstUsesWith(IE, VecOp);
+
+ // If the inserted element was extracted from some other vector, and if the
+ // indexes are constant, try to turn this into a shufflevector operation.
+ if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
+ if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
+ EI->getOperand(0)->getType() == IE.getType()) {
+ unsigned NumVectorElts = IE.getType()->getNumElements();
+ unsigned ExtractedIdx =
+ cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+ unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+
+ if (ExtractedIdx >= NumVectorElts) // Out of range extract.
+ return ReplaceInstUsesWith(IE, VecOp);
+
+ if (InsertedIdx >= NumVectorElts) // Out of range insert.
+ return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
+
+ // If we are extracting a value from a vector, then inserting it right
+ // back into the same place, just use the input vector.
+ if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
+ return ReplaceInstUsesWith(IE, VecOp);
+
+ // If this insertelement isn't used by some other insertelement, turn it
+ // (and any insertelements it points to), into one big shuffle.
+ if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
+ std::vector<Constant*> Mask;
+ Value *RHS = 0;
+ Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
+ if (RHS == 0) RHS = UndefValue::get(LHS->getType());
+ // We now have a shuffle of LHS, RHS, Mask.
+ return new ShuffleVectorInst(LHS, RHS,
+ ConstantVector::get(Mask));
+ }
+ }
+ }
+
+ unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements();
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts))
+ return &IE;
+
+ return 0;
+}
+
+
+Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+ Value *LHS = SVI.getOperand(0);
+ Value *RHS = SVI.getOperand(1);
+ std::vector<unsigned> Mask = getShuffleMask(&SVI);
+
+ bool MadeChange = false;
+
+ // Undefined shuffle mask -> undefined value.
+ if (isa<UndefValue>(SVI.getOperand(2)))
+ return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
+
+ unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
+
+ if (VWidth != cast<VectorType>(LHS->getType())->getNumElements())
+ return 0;
+
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
+ LHS = SVI.getOperand(0);
+ RHS = SVI.getOperand(1);
+ MadeChange = true;
+ }
+
+ // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
+ // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
+ if (LHS == RHS || isa<UndefValue>(LHS)) {
+ if (isa<UndefValue>(LHS) && LHS == RHS) {
+ // shuffle(undef,undef,mask) -> undef.
+ return ReplaceInstUsesWith(SVI, LHS);
+ }
+
+ // Remap any references to RHS to use LHS.
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+ if (Mask[i] >= 2*e)
+ Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+ else {
+ if ((Mask[i] >= e && isa<UndefValue>(RHS)) ||
+ (Mask[i] < e && isa<UndefValue>(LHS))) {
+ Mask[i] = 2*e; // Turn into undef.
+ Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+ } else {
+ Mask[i] = Mask[i] % e; // Force to LHS.
+ Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
+ Mask[i]));
+ }
+ }
+ }
+ SVI.setOperand(0, SVI.getOperand(1));
+ SVI.setOperand(1, UndefValue::get(RHS->getType()));
+ SVI.setOperand(2, ConstantVector::get(Elts));
+ LHS = SVI.getOperand(0);
+ RHS = SVI.getOperand(1);
+ MadeChange = true;
+ }
+
+ // Analyze the shuffle, are the LHS or RHS and identity shuffles?
+ bool isLHSID = true, isRHSID = true;
+
+ for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+ if (Mask[i] >= e*2) continue; // Ignore undef values.
+ // Is this an identity shuffle of the LHS value?
+ isLHSID &= (Mask[i] == i);
+
+ // Is this an identity shuffle of the RHS value?
+ isRHSID &= (Mask[i]-e == i);
+ }
+
+ // Eliminate identity shuffles.
+ if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
+ if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
+
+ // If the LHS is a shufflevector itself, see if we can combine it with this
+ // one without producing an unusual shuffle. Here we are really conservative:
+ // we are absolutely afraid of producing a shuffle mask not in the input
+ // program, because the code gen may not be smart enough to turn a merged
+ // shuffle into two specific shuffles: it may produce worse code. As such,
+ // we only merge two shuffles if the result is one of the two input shuffle
+ // masks. In this case, merging the shuffles just removes one instruction,
+ // which we know is safe. This is good for things like turning:
+ // (splat(splat)) -> splat.
+ if (ShuffleVectorInst *LHSSVI = dyn_cast<ShuffleVectorInst>(LHS)) {
+ if (isa<UndefValue>(RHS)) {
+ std::vector<unsigned> LHSMask = getShuffleMask(LHSSVI);
+
+ if (LHSMask.size() == Mask.size()) {
+ std::vector<unsigned> NewMask;
+ for (unsigned i = 0, e = Mask.size(); i != e; ++i)
+ if (Mask[i] >= e)
+ NewMask.push_back(2*e);
+ else
+ NewMask.push_back(LHSMask[Mask[i]]);
+
+ // If the result mask is equal to the src shuffle or this
+ // shuffle mask, do the replacement.
+ if (NewMask == LHSMask || NewMask == Mask) {
+ unsigned LHSInNElts =
+ cast<VectorType>(LHSSVI->getOperand(0)->getType())->
+ getNumElements();
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
+ if (NewMask[i] >= LHSInNElts*2) {
+ Elts.push_back(UndefValue::get(
+ Type::getInt32Ty(SVI.getContext())));
+ } else {
+ Elts.push_back(ConstantInt::get(
+ Type::getInt32Ty(SVI.getContext()),
+ NewMask[i]));
+ }
+ }
+ return new ShuffleVectorInst(LHSSVI->getOperand(0),
+ LHSSVI->getOperand(1),
+ ConstantVector::get(Elts));
+ }
+ }
+ }
+ }
+
+ return MadeChange ? &SVI : 0;
+}
+
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
new file mode 100644
index 0000000..9100a85
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -0,0 +1,105 @@
+//===- InstCombineWorklist.h - Worklist for the InstCombine pass ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTCOMBINE_WORKLIST_H
+#define INSTCOMBINE_WORKLIST_H
+
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Instruction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+/// InstCombineWorklist - This is the worklist management logic for
+/// InstCombine.
+class LLVM_LIBRARY_VISIBILITY InstCombineWorklist {
+ SmallVector<Instruction*, 256> Worklist;
+ DenseMap<Instruction*, unsigned> WorklistMap;
+
+ void operator=(const InstCombineWorklist&RHS); // DO NOT IMPLEMENT
+ InstCombineWorklist(const InstCombineWorklist&); // DO NOT IMPLEMENT
+public:
+ InstCombineWorklist() {}
+
+ bool isEmpty() const { return Worklist.empty(); }
+
+ /// Add - Add the specified instruction to the worklist if it isn't already
+ /// in it.
+ void Add(Instruction *I) {
+ if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
+ DEBUG(errs() << "IC: ADD: " << *I << '\n');
+ Worklist.push_back(I);
+ }
+ }
+
+ void AddValue(Value *V) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ Add(I);
+ }
+
+ /// AddInitialGroup - Add the specified batch of stuff in reverse order.
+ /// which should only be done when the worklist is empty and when the group
+ /// has no duplicates.
+ void AddInitialGroup(Instruction *const *List, unsigned NumEntries) {
+ assert(Worklist.empty() && "Worklist must be empty to add initial group");
+ Worklist.reserve(NumEntries+16);
+ DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
+ for (; NumEntries; --NumEntries) {
+ Instruction *I = List[NumEntries-1];
+ WorklistMap.insert(std::make_pair(I, Worklist.size()));
+ Worklist.push_back(I);
+ }
+ }
+
+ // Remove - remove I from the worklist if it exists.
+ void Remove(Instruction *I) {
+ DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I);
+ if (It == WorklistMap.end()) return; // Not in worklist.
+
+ // Don't bother moving everything down, just null out the slot.
+ Worklist[It->second] = 0;
+
+ WorklistMap.erase(It);
+ }
+
+ Instruction *RemoveOne() {
+ Instruction *I = Worklist.back();
+ Worklist.pop_back();
+ WorklistMap.erase(I);
+ return I;
+ }
+
+ /// AddUsersToWorkList - When an instruction is simplified, add all users of
+ /// the instruction to the work lists because they might get more simplified
+ /// now.
+ ///
+ void AddUsersToWorkList(Instruction &I) {
+ for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
+ UI != UE; ++UI)
+ Add(cast<Instruction>(*UI));
+ }
+
+
+ /// Zap - check that the worklist is empty and nuke the backing store for
+ /// the map if it is large.
+ void Zap() {
+ assert(WorklistMap.empty() && "Worklist empty, but map not?");
+
+ // Do an explicit clear, this shrinks the map if needed.
+ WorklistMap.clear();
+ }
+};
+
+} // end namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
new file mode 100644
index 0000000..e46c679
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -0,0 +1,1304 @@
+//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InstructionCombining - Combine instructions to form fewer, simple
+// instructions. This pass does not modify the CFG. This pass is where
+// algebraic simplification happens.
+//
+// This pass combines things like:
+// %Y = add i32 %X, 1
+// %Z = add i32 %Y, 1
+// into:
+// %Z = add i32 %X, 2
+//
+// This is a simple worklist driven algorithm.
+//
+// This pass guarantees that the following canonicalizations are performed on
+// the program:
+// 1. If a binary operator has a constant operand, it is moved to the RHS
+// 2. Bitwise operators with constant operands are always grouped so that
+// shifts are performed first, then or's, then and's, then xor's.
+// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
+// 4. All cmp instructions on boolean values are replaced with logical ops
+// 5. add X, X is represented as (X*2) => (X << 1)
+// 6. Multiplies with a power-of-two constant argument are transformed into
+// shifts.
+// ... etc.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Scalar.h"
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include <algorithm>
+#include <climits>
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+STATISTIC(NumCombined , "Number of insts combined");
+STATISTIC(NumConstProp, "Number of constant folds");
+STATISTIC(NumDeadInst , "Number of dead inst eliminated");
+STATISTIC(NumSunkInst , "Number of instructions sunk");
+
+
+char InstCombiner::ID = 0;
+INITIALIZE_PASS(InstCombiner, "instcombine",
+ "Combine redundant instructions", false, false);
+
+void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreservedID(LCSSAID);
+ AU.setPreservesCFG();
+}
+
+
+/// ShouldChangeType - Return true if it is desirable to convert a computation
+/// from 'From' to 'To'. We don't want to convert from a legal to an illegal
+/// type for example, or from a smaller to a larger illegal type.
+bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const {
+ assert(From->isIntegerTy() && To->isIntegerTy());
+
+ // If we don't have TD, we don't know if the source/dest are legal.
+ if (!TD) return false;
+
+ unsigned FromWidth = From->getPrimitiveSizeInBits();
+ unsigned ToWidth = To->getPrimitiveSizeInBits();
+ bool FromLegal = TD->isLegalInteger(FromWidth);
+ bool ToLegal = TD->isLegalInteger(ToWidth);
+
+ // If this is a legal integer from type, and the result would be an illegal
+ // type, don't do the transformation.
+ if (FromLegal && !ToLegal)
+ return false;
+
+ // Otherwise, if both are illegal, do not increase the size of the result. We
+ // do allow things like i160 -> i64, but not i64 -> i160.
+ if (!FromLegal && !ToLegal && ToWidth > FromWidth)
+ return false;
+
+ return true;
+}
+
+
+// SimplifyCommutative - This performs a few simplifications for commutative
+// operators:
+//
+// 1. Order operands such that they are listed from right (least complex) to
+// left (most complex). This puts constants before unary operators before
+// binary operators.
+//
+// 2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2))
+// 3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
+//
+bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
+ bool Changed = false;
+ if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1)))
+ Changed = !I.swapOperands();
+
+ if (!I.isAssociative()) return Changed;
+
+ Instruction::BinaryOps Opcode = I.getOpcode();
+ if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0)))
+ if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) {
+ if (isa<Constant>(I.getOperand(1))) {
+ Constant *Folded = ConstantExpr::get(I.getOpcode(),
+ cast<Constant>(I.getOperand(1)),
+ cast<Constant>(Op->getOperand(1)));
+ I.setOperand(0, Op->getOperand(0));
+ I.setOperand(1, Folded);
+ return true;
+ }
+
+ if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1)))
+ if (Op1->getOpcode() == Opcode && isa<Constant>(Op1->getOperand(1)) &&
+ Op->hasOneUse() && Op1->hasOneUse()) {
+ Constant *C1 = cast<Constant>(Op->getOperand(1));
+ Constant *C2 = cast<Constant>(Op1->getOperand(1));
+
+ // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
+ Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2);
+ Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0),
+ Op1->getOperand(0),
+ Op1->getName(), &I);
+ Worklist.Add(New);
+ I.setOperand(0, New);
+ I.setOperand(1, Folded);
+ return true;
+ }
+ }
+ return Changed;
+}
+
+// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
+// if the LHS is a constant zero (which is the 'negate' form).
+//
+Value *InstCombiner::dyn_castNegVal(Value *V) const {
+ if (BinaryOperator::isNeg(V))
+ return BinaryOperator::getNegArgument(V);
+
+ // Constants can be considered to be negated values if they can be folded.
+ if (ConstantInt *C = dyn_cast<ConstantInt>(V))
+ return ConstantExpr::getNeg(C);
+
+ if (ConstantVector *C = dyn_cast<ConstantVector>(V))
+ if (C->getType()->getElementType()->isIntegerTy())
+ return ConstantExpr::getNeg(C);
+
+ return 0;
+}
+
+// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the
+// instruction if the LHS is a constant negative zero (which is the 'negate'
+// form).
+//
+Value *InstCombiner::dyn_castFNegVal(Value *V) const {
+ if (BinaryOperator::isFNeg(V))
+ return BinaryOperator::getFNegArgument(V);
+
+ // Constants can be considered to be negated values if they can be folded.
+ if (ConstantFP *C = dyn_cast<ConstantFP>(V))
+ return ConstantExpr::getFNeg(C);
+
+ if (ConstantVector *C = dyn_cast<ConstantVector>(V))
+ if (C->getType()->getElementType()->isFloatingPointTy())
+ return ConstantExpr::getFNeg(C);
+
+ return 0;
+}
+
+static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
+ InstCombiner *IC) {
+ if (CastInst *CI = dyn_cast<CastInst>(&I))
+ return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType());
+
+ // Figure out if the constant is the left or the right argument.
+ bool ConstIsRHS = isa<Constant>(I.getOperand(1));
+ Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
+
+ if (Constant *SOC = dyn_cast<Constant>(SO)) {
+ if (ConstIsRHS)
+ return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
+ return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
+ }
+
+ Value *Op0 = SO, *Op1 = ConstOperand;
+ if (!ConstIsRHS)
+ std::swap(Op0, Op1);
+
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
+ return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1,
+ SO->getName()+".op");
+ if (ICmpInst *CI = dyn_cast<ICmpInst>(&I))
+ return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
+ SO->getName()+".cmp");
+ if (FCmpInst *CI = dyn_cast<FCmpInst>(&I))
+ return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
+ SO->getName()+".cmp");
+ llvm_unreachable("Unknown binary instruction type!");
+}
+
+// FoldOpIntoSelect - Given an instruction with a select as one operand and a
+// constant as the other operand, try to fold the binary operator into the
+// select arguments. This also works for Cast instructions, which obviously do
+// not have a second operand.
+Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
+ // Don't modify shared select instructions
+ if (!SI->hasOneUse()) return 0;
+ Value *TV = SI->getOperand(1);
+ Value *FV = SI->getOperand(2);
+
+ if (isa<Constant>(TV) || isa<Constant>(FV)) {
+ // Bool selects with constant operands can be folded to logical ops.
+ if (SI->getType()->isIntegerTy(1)) return 0;
+
+ Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
+ Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this);
+
+ return SelectInst::Create(SI->getCondition(), SelectTrueVal,
+ SelectFalseVal);
+ }
+ return 0;
+}
+
+
+/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which
+/// has a PHI node as operand #0, see if we can fold the instruction into the
+/// PHI (which is only possible if all operands to the PHI are constants).
+///
+/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms
+/// that would normally be unprofitable because they strongly encourage jump
+/// threading.
+Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
+ bool AllowAggressive) {
+ AllowAggressive = false;
+ PHINode *PN = cast<PHINode>(I.getOperand(0));
+ unsigned NumPHIValues = PN->getNumIncomingValues();
+ if (NumPHIValues == 0 ||
+ // We normally only transform phis with a single use, unless we're trying
+ // hard to make jump threading happen.
+ (!PN->hasOneUse() && !AllowAggressive))
+ return 0;
+
+
+ // Check to see if all of the operands of the PHI are simple constants
+ // (constantint/constantfp/undef). If there is one non-constant value,
+ // remember the BB it is in. If there is more than one or if *it* is a PHI,
+ // bail out. We don't do arbitrary constant expressions here because moving
+ // their computation can be expensive without a cost model.
+ BasicBlock *NonConstBB = 0;
+ for (unsigned i = 0; i != NumPHIValues; ++i)
+ if (!isa<Constant>(PN->getIncomingValue(i)) ||
+ isa<ConstantExpr>(PN->getIncomingValue(i))) {
+ if (NonConstBB) return 0; // More than one non-const value.
+ if (isa<PHINode>(PN->getIncomingValue(i))) return 0; // Itself a phi.
+ NonConstBB = PN->getIncomingBlock(i);
+
+ // If the incoming non-constant value is in I's block, we have an infinite
+ // loop.
+ if (NonConstBB == I.getParent())
+ return 0;
+ }
+
+ // If there is exactly one non-constant value, we can insert a copy of the
+ // operation in that block. However, if this is a critical edge, we would be
+ // inserting the computation one some other paths (e.g. inside a loop). Only
+ // do this if the pred block is unconditionally branching into the phi block.
+ if (NonConstBB != 0 && !AllowAggressive) {
+ BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
+ if (!BI || !BI->isUnconditional()) return 0;
+ }
+
+ // Okay, we can do the transformation: create the new PHI node.
+ PHINode *NewPN = PHINode::Create(I.getType(), "");
+ NewPN->reserveOperandSpace(PN->getNumOperands()/2);
+ InsertNewInstBefore(NewPN, *PN);
+ NewPN->takeName(PN);
+
+ // Next, add all of the operands to the PHI.
+ if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
+ // We only currently try to fold the condition of a select when it is a phi,
+ // not the true/false values.
+ Value *TrueV = SI->getTrueValue();
+ Value *FalseV = SI->getFalseValue();
+ BasicBlock *PhiTransBB = PN->getParent();
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ BasicBlock *ThisBB = PN->getIncomingBlock(i);
+ Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
+ Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
+ Value *InV = 0;
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
+ InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
+ } else {
+ assert(PN->getIncomingBlock(i) == NonConstBB);
+ InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred,
+ FalseVInPred,
+ "phitmp", NonConstBB->getTerminator());
+ Worklist.Add(cast<Instruction>(InV));
+ }
+ NewPN->addIncoming(InV, ThisBB);
+ }
+ } else if (I.getNumOperands() == 2) {
+ Constant *C = cast<Constant>(I.getOperand(1));
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ Value *InV = 0;
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
+ if (CmpInst *CI = dyn_cast<CmpInst>(&I))
+ InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
+ else
+ InV = ConstantExpr::get(I.getOpcode(), InC, C);
+ } else {
+ assert(PN->getIncomingBlock(i) == NonConstBB);
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
+ InV = BinaryOperator::Create(BO->getOpcode(),
+ PN->getIncomingValue(i), C, "phitmp",
+ NonConstBB->getTerminator());
+ else if (CmpInst *CI = dyn_cast<CmpInst>(&I))
+ InV = CmpInst::Create(CI->getOpcode(),
+ CI->getPredicate(),
+ PN->getIncomingValue(i), C, "phitmp",
+ NonConstBB->getTerminator());
+ else
+ llvm_unreachable("Unknown binop!");
+
+ Worklist.Add(cast<Instruction>(InV));
+ }
+ NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+ }
+ } else {
+ CastInst *CI = cast<CastInst>(&I);
+ const Type *RetTy = CI->getType();
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ Value *InV;
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
+ InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
+ } else {
+ assert(PN->getIncomingBlock(i) == NonConstBB);
+ InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i),
+ I.getType(), "phitmp",
+ NonConstBB->getTerminator());
+ Worklist.Add(cast<Instruction>(InV));
+ }
+ NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+ }
+ }
+ return ReplaceInstUsesWith(I, NewPN);
+}
+
+/// FindElementAtOffset - Given a type and a constant offset, determine whether
+/// or not there is a sequence of GEP indices into the type that will land us at
+/// the specified offset. If so, fill them into NewIndices and return the
+/// resultant element type, otherwise return null.
+const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset,
+ SmallVectorImpl<Value*> &NewIndices) {
+ if (!TD) return 0;
+ if (!Ty->isSized()) return 0;
+
+ // Start with the index over the outer type. Note that the type size
+ // might be zero (even if the offset isn't zero) if the indexed type
+ // is something like [0 x {int, int}]
+ const Type *IntPtrTy = TD->getIntPtrType(Ty->getContext());
+ int64_t FirstIdx = 0;
+ if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
+ FirstIdx = Offset/TySize;
+ Offset -= FirstIdx*TySize;
+
+ // Handle hosts where % returns negative instead of values [0..TySize).
+ if (Offset < 0) {
+ --FirstIdx;
+ Offset += TySize;
+ assert(Offset >= 0);
+ }
+ assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
+ }
+
+ NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
+
+ // Index into the types. If we fail, set OrigBase to null.
+ while (Offset) {
+ // Indexing into tail padding between struct/array elements.
+ if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty))
+ return 0;
+
+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ const StructLayout *SL = TD->getStructLayout(STy);
+ assert(Offset < (int64_t)SL->getSizeInBytes() &&
+ "Offset must stay within the indexed type");
+
+ unsigned Elt = SL->getElementContainingOffset(Offset);
+ NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
+ Elt));
+
+ Offset -= SL->getElementOffset(Elt);
+ Ty = STy->getElementType(Elt);
+ } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+ uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType());
+ assert(EltSize && "Cannot index into a zero-sized array");
+ NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
+ Offset %= EltSize;
+ Ty = AT->getElementType();
+ } else {
+ // Otherwise, we can't index into the middle of this atomic type, bail.
+ return 0;
+ }
+ }
+
+ return Ty;
+}
+
+
+
+Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
+ SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
+
+ if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD))
+ return ReplaceInstUsesWith(GEP, V);
+
+ Value *PtrOp = GEP.getOperand(0);
+
+ if (isa<UndefValue>(GEP.getOperand(0)))
+ return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType()));
+
+ // Eliminate unneeded casts for indices.
+ if (TD) {
+ bool MadeChange = false;
+ unsigned PtrSize = TD->getPointerSizeInBits();
+
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
+ I != E; ++I, ++GTI) {
+ if (!isa<SequentialType>(*GTI)) continue;
+
+ // If we are using a wider index than needed for this platform, shrink it
+ // to what we need. If narrower, sign-extend it to what we need. This
+ // explicit cast can make subsequent optimizations more obvious.
+ unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth();
+ if (OpBits == PtrSize)
+ continue;
+
+ *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true);
+ MadeChange = true;
+ }
+ if (MadeChange) return &GEP;
+ }
+
+ // Combine Indices - If the source pointer to this getelementptr instruction
+ // is a getelementptr instruction, combine the indices of the two
+ // getelementptr instructions into a single instruction.
+ //
+ if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
+ // Note that if our source is a gep chain itself that we wait for that
+ // chain to be resolved before we perform this transformation. This
+ // avoids us creating a TON of code in some cases.
+ //
+ if (GetElementPtrInst *SrcGEP =
+ dyn_cast<GetElementPtrInst>(Src->getOperand(0)))
+ if (SrcGEP->getNumOperands() == 2)
+ return 0; // Wait until our source is folded to completion.
+
+ SmallVector<Value*, 8> Indices;
+
+ // Find out whether the last index in the source GEP is a sequential idx.
+ bool EndsWithSequential = false;
+ for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
+ I != E; ++I)
+ EndsWithSequential = !(*I)->isStructTy();
+
+ // Can we combine the two pointer arithmetics offsets?
+ if (EndsWithSequential) {
+ // Replace: gep (gep %P, long B), long A, ...
+ // With: T = long A+B; gep %P, T, ...
+ //
+ Value *Sum;
+ Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
+ Value *GO1 = GEP.getOperand(1);
+ if (SO1 == Constant::getNullValue(SO1->getType())) {
+ Sum = GO1;
+ } else if (GO1 == Constant::getNullValue(GO1->getType())) {
+ Sum = SO1;
+ } else {
+ // If they aren't the same type, then the input hasn't been processed
+ // by the loop above yet (which canonicalizes sequential index types to
+ // intptr_t). Just avoid transforming this until the input has been
+ // normalized.
+ if (SO1->getType() != GO1->getType())
+ return 0;
+ Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
+ }
+
+ // Update the GEP in place if possible.
+ if (Src->getNumOperands() == 2) {
+ GEP.setOperand(0, Src->getOperand(0));
+ GEP.setOperand(1, Sum);
+ return &GEP;
+ }
+ Indices.append(Src->op_begin()+1, Src->op_end()-1);
+ Indices.push_back(Sum);
+ Indices.append(GEP.op_begin()+2, GEP.op_end());
+ } else if (isa<Constant>(*GEP.idx_begin()) &&
+ cast<Constant>(*GEP.idx_begin())->isNullValue() &&
+ Src->getNumOperands() != 1) {
+ // Otherwise we can do the fold if the first index of the GEP is a zero
+ Indices.append(Src->op_begin()+1, Src->op_end());
+ Indices.append(GEP.idx_begin()+1, GEP.idx_end());
+ }
+
+ if (!Indices.empty())
+ return (GEP.isInBounds() && Src->isInBounds()) ?
+ GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(),
+ Indices.end(), GEP.getName()) :
+ GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(),
+ Indices.end(), GEP.getName());
+ }
+
+ // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
+ Value *StrippedPtr = PtrOp->stripPointerCasts();
+ if (StrippedPtr != PtrOp) {
+ const PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType());
+
+ bool HasZeroPointerIndex = false;
+ if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
+ HasZeroPointerIndex = C->isZero();
+
+ // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
+ // into : GEP [10 x i8]* X, i32 0, ...
+ //
+ // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
+ // into : GEP i8* X, ...
+ //
+ // This occurs when the program declares an array extern like "int X[];"
+ if (HasZeroPointerIndex) {
+ const PointerType *CPTy = cast<PointerType>(PtrOp->getType());
+ if (const ArrayType *CATy =
+ dyn_cast<ArrayType>(CPTy->getElementType())) {
+ // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
+ if (CATy->getElementType() == StrippedPtrTy->getElementType()) {
+ // -> GEP i8* X, ...
+ SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end());
+ GetElementPtrInst *Res =
+ GetElementPtrInst::Create(StrippedPtr, Idx.begin(),
+ Idx.end(), GEP.getName());
+ Res->setIsInBounds(GEP.isInBounds());
+ return Res;
+ }
+
+ if (const ArrayType *XATy =
+ dyn_cast<ArrayType>(StrippedPtrTy->getElementType())){
+ // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
+ if (CATy->getElementType() == XATy->getElementType()) {
+ // -> GEP [10 x i8]* X, i32 0, ...
+ // At this point, we know that the cast source type is a pointer
+ // to an array of the same type as the destination pointer
+ // array. Because the array type is never stepped over (there
+ // is a leading zero) we can fold the cast into this GEP.
+ GEP.setOperand(0, StrippedPtr);
+ return &GEP;
+ }
+ }
+ }
+ } else if (GEP.getNumOperands() == 2) {
+ // Transform things like:
+ // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
+ // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
+ const Type *SrcElTy = StrippedPtrTy->getElementType();
+ const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
+ if (TD && SrcElTy->isArrayTy() &&
+ TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
+ TD->getTypeAllocSize(ResElTy)) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
+ Idx[1] = GEP.getOperand(1);
+ Value *NewGEP = GEP.isInBounds() ?
+ Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2, GEP.getName()) :
+ Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName());
+ // V and GEP are both pointer types --> BitCast
+ return new BitCastInst(NewGEP, GEP.getType());
+ }
+
+ // Transform things like:
+ // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
+ // (where tmp = 8*tmp2) into:
+ // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
+
+ if (TD && SrcElTy->isArrayTy() && ResElTy->isIntegerTy(8)) {
+ uint64_t ArrayEltSize =
+ TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
+
+ // Check to see if "tmp" is a scale by a multiple of ArrayEltSize. We
+ // allow either a mul, shift, or constant here.
+ Value *NewIdx = 0;
+ ConstantInt *Scale = 0;
+ if (ArrayEltSize == 1) {
+ NewIdx = GEP.getOperand(1);
+ Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1);
+ } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) {
+ NewIdx = ConstantInt::get(CI->getType(), 1);
+ Scale = CI;
+ } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){
+ if (Inst->getOpcode() == Instruction::Shl &&
+ isa<ConstantInt>(Inst->getOperand(1))) {
+ ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1));
+ uint32_t ShAmtVal = ShAmt->getLimitedValue(64);
+ Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()),
+ 1ULL << ShAmtVal);
+ NewIdx = Inst->getOperand(0);
+ } else if (Inst->getOpcode() == Instruction::Mul &&
+ isa<ConstantInt>(Inst->getOperand(1))) {
+ Scale = cast<ConstantInt>(Inst->getOperand(1));
+ NewIdx = Inst->getOperand(0);
+ }
+ }
+
+ // If the index will be to exactly the right offset with the scale taken
+ // out, perform the transformation. Note, we don't know whether Scale is
+ // signed or not. We'll use unsigned version of division/modulo
+ // operation after making sure Scale doesn't have the sign bit set.
+ if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL &&
+ Scale->getZExtValue() % ArrayEltSize == 0) {
+ Scale = ConstantInt::get(Scale->getType(),
+ Scale->getZExtValue() / ArrayEltSize);
+ if (Scale->getZExtValue() != 1) {
+ Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(),
+ false /*ZExt*/);
+ NewIdx = Builder->CreateMul(NewIdx, C, "idxscale");
+ }
+
+ // Insert the new GEP instruction.
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
+ Idx[1] = NewIdx;
+ Value *NewGEP = GEP.isInBounds() ?
+ Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2,GEP.getName()):
+ Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName());
+ // The NewGEP must be pointer typed, so must the old one -> BitCast
+ return new BitCastInst(NewGEP, GEP.getType());
+ }
+ }
+ }
+ }
+
+ /// See if we can simplify:
+ /// X = bitcast A* to B*
+ /// Y = gep X, <...constant indices...>
+ /// into a gep of the original struct. This is important for SROA and alias
+ /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
+ if (TD &&
+ !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) {
+ // Determine how much the GEP moves the pointer. We are guaranteed to get
+ // a constant back from EmitGEPOffset.
+ ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP));
+ int64_t Offset = OffsetV->getSExtValue();
+
+ // If this GEP instruction doesn't move the pointer, just replace the GEP
+ // with a bitcast of the real input to the dest type.
+ if (Offset == 0) {
+ // If the bitcast is of an allocation, and the allocation will be
+ // converted to match the type of the cast, don't touch this.
+ if (isa<AllocaInst>(BCI->getOperand(0)) ||
+ isMalloc(BCI->getOperand(0))) {
+ // See if the bitcast simplifies, if so, don't nuke this GEP yet.
+ if (Instruction *I = visitBitCast(*BCI)) {
+ if (I != BCI) {
+ I->takeName(BCI);
+ BCI->getParent()->getInstList().insert(BCI, I);
+ ReplaceInstUsesWith(*BCI, I);
+ }
+ return &GEP;
+ }
+ }
+ return new BitCastInst(BCI->getOperand(0), GEP.getType());
+ }
+
+ // Otherwise, if the offset is non-zero, we need to find out if there is a
+ // field at Offset in 'A's type. If so, we can pull the cast through the
+ // GEP.
+ SmallVector<Value*, 8> NewIndices;
+ const Type *InTy =
+ cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
+ if (FindElementAtOffset(InTy, Offset, NewIndices)) {
+ Value *NGEP = GEP.isInBounds() ?
+ Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(),
+ NewIndices.end()) :
+ Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(),
+ NewIndices.end());
+
+ if (NGEP->getType() == GEP.getType())
+ return ReplaceInstUsesWith(GEP, NGEP);
+ NGEP->takeName(&GEP);
+ return new BitCastInst(NGEP, GEP.getType());
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+
+static bool IsOnlyNullComparedAndFreed(const Value &V) {
+ for (Value::const_use_iterator UI = V.use_begin(), UE = V.use_end();
+ UI != UE; ++UI) {
+ const User *U = *UI;
+ if (isFreeCall(U))
+ continue;
+ if (const ICmpInst *ICI = dyn_cast<ICmpInst>(U))
+ if (ICI->isEquality() && isa<ConstantPointerNull>(ICI->getOperand(1)))
+ continue;
+ return false;
+ }
+ return true;
+}
+
+Instruction *InstCombiner::visitMalloc(Instruction &MI) {
+ // If we have a malloc call which is only used in any amount of comparisons
+ // to null and free calls, delete the calls and replace the comparisons with
+ // true or false as appropriate.
+ if (IsOnlyNullComparedAndFreed(MI)) {
+ for (Value::use_iterator UI = MI.use_begin(), UE = MI.use_end();
+ UI != UE;) {
+ // We can assume that every remaining use is a free call or an icmp eq/ne
+ // to null, so the cast is safe.
+ Instruction *I = cast<Instruction>(*UI);
+
+ // Early increment here, as we're about to get rid of the user.
+ ++UI;
+
+ if (isFreeCall(I)) {
+ EraseInstFromFunction(*cast<CallInst>(I));
+ continue;
+ }
+ // Again, the cast is safe.
+ ICmpInst *C = cast<ICmpInst>(I);
+ ReplaceInstUsesWith(*C, ConstantInt::get(Type::getInt1Ty(C->getContext()),
+ C->isFalseWhenEqual()));
+ EraseInstFromFunction(*C);
+ }
+ return EraseInstFromFunction(MI);
+ }
+ return 0;
+}
+
+
+
+Instruction *InstCombiner::visitFree(CallInst &FI) {
+ Value *Op = FI.getArgOperand(0);
+
+ // free undef -> unreachable.
+ if (isa<UndefValue>(Op)) {
+ // Insert a new store to null because we cannot modify the CFG here.
+ new StoreInst(ConstantInt::getTrue(FI.getContext()),
+ UndefValue::get(Type::getInt1PtrTy(FI.getContext())), &FI);
+ return EraseInstFromFunction(FI);
+ }
+
+ // If we have 'free null' delete the instruction. This can happen in stl code
+ // when lots of inlining happens.
+ if (isa<ConstantPointerNull>(Op))
+ return EraseInstFromFunction(FI);
+
+ return 0;
+}
+
+
+
+Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
+ // Change br (not X), label True, label False to: br X, label False, True
+ Value *X = 0;
+ BasicBlock *TrueDest;
+ BasicBlock *FalseDest;
+ if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) &&
+ !isa<Constant>(X)) {
+ // Swap Destinations and condition...
+ BI.setCondition(X);
+ BI.setSuccessor(0, FalseDest);
+ BI.setSuccessor(1, TrueDest);
+ return &BI;
+ }
+
+ // Cannonicalize fcmp_one -> fcmp_oeq
+ FCmpInst::Predicate FPred; Value *Y;
+ if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)),
+ TrueDest, FalseDest)) &&
+ BI.getCondition()->hasOneUse())
+ if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE ||
+ FPred == FCmpInst::FCMP_OGE) {
+ FCmpInst *Cond = cast<FCmpInst>(BI.getCondition());
+ Cond->setPredicate(FCmpInst::getInversePredicate(FPred));
+
+ // Swap Destinations and condition.
+ BI.setSuccessor(0, FalseDest);
+ BI.setSuccessor(1, TrueDest);
+ Worklist.Add(Cond);
+ return &BI;
+ }
+
+ // Cannonicalize icmp_ne -> icmp_eq
+ ICmpInst::Predicate IPred;
+ if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)),
+ TrueDest, FalseDest)) &&
+ BI.getCondition()->hasOneUse())
+ if (IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE ||
+ IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE ||
+ IPred == ICmpInst::ICMP_SGE) {
+ ICmpInst *Cond = cast<ICmpInst>(BI.getCondition());
+ Cond->setPredicate(ICmpInst::getInversePredicate(IPred));
+ // Swap Destinations and condition.
+ BI.setSuccessor(0, FalseDest);
+ BI.setSuccessor(1, TrueDest);
+ Worklist.Add(Cond);
+ return &BI;
+ }
+
+ return 0;
+}
+
+Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
+ Value *Cond = SI.getCondition();
+ if (Instruction *I = dyn_cast<Instruction>(Cond)) {
+ if (I->getOpcode() == Instruction::Add)
+ if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // change 'switch (X+4) case 1:' into 'switch (X) case -3'
+ for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2)
+ SI.setOperand(i,
+ ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)),
+ AddRHS));
+ SI.setOperand(0, I->getOperand(0));
+ Worklist.Add(I);
+ return &SI;
+ }
+ }
+ return 0;
+}
+
+Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
+ Value *Agg = EV.getAggregateOperand();
+
+ if (!EV.hasIndices())
+ return ReplaceInstUsesWith(EV, Agg);
+
+ if (Constant *C = dyn_cast<Constant>(Agg)) {
+ if (isa<UndefValue>(C))
+ return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType()));
+
+ if (isa<ConstantAggregateZero>(C))
+ return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType()));
+
+ if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
+ // Extract the element indexed by the first index out of the constant
+ Value *V = C->getOperand(*EV.idx_begin());
+ if (EV.getNumIndices() > 1)
+ // Extract the remaining indices out of the constant indexed by the
+ // first index
+ return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end());
+ else
+ return ReplaceInstUsesWith(EV, V);
+ }
+ return 0; // Can't handle other constants
+ }
+ if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
+ // We're extracting from an insertvalue instruction, compare the indices
+ const unsigned *exti, *exte, *insi, *inse;
+ for (exti = EV.idx_begin(), insi = IV->idx_begin(),
+ exte = EV.idx_end(), inse = IV->idx_end();
+ exti != exte && insi != inse;
+ ++exti, ++insi) {
+ if (*insi != *exti)
+ // The insert and extract both reference distinctly different elements.
+ // This means the extract is not influenced by the insert, and we can
+ // replace the aggregate operand of the extract with the aggregate
+ // operand of the insert. i.e., replace
+ // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
+ // %E = extractvalue { i32, { i32 } } %I, 0
+ // with
+ // %E = extractvalue { i32, { i32 } } %A, 0
+ return ExtractValueInst::Create(IV->getAggregateOperand(),
+ EV.idx_begin(), EV.idx_end());
+ }
+ if (exti == exte && insi == inse)
+ // Both iterators are at the end: Index lists are identical. Replace
+ // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
+ // %C = extractvalue { i32, { i32 } } %B, 1, 0
+ // with "i32 42"
+ return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand());
+ if (exti == exte) {
+ // The extract list is a prefix of the insert list. i.e. replace
+ // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
+ // %E = extractvalue { i32, { i32 } } %I, 1
+ // with
+ // %X = extractvalue { i32, { i32 } } %A, 1
+ // %E = insertvalue { i32 } %X, i32 42, 0
+ // by switching the order of the insert and extract (though the
+ // insertvalue should be left in, since it may have other uses).
+ Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(),
+ EV.idx_begin(), EV.idx_end());
+ return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
+ insi, inse);
+ }
+ if (insi == inse)
+ // The insert list is a prefix of the extract list
+ // We can simply remove the common indices from the extract and make it
+ // operate on the inserted value instead of the insertvalue result.
+ // i.e., replace
+ // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
+ // %E = extractvalue { i32, { i32 } } %I, 1, 0
+ // with
+ // %E extractvalue { i32 } { i32 42 }, 0
+ return ExtractValueInst::Create(IV->getInsertedValueOperand(),
+ exti, exte);
+ }
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
+ // We're extracting from an intrinsic, see if we're the only user, which
+ // allows us to simplify multiple result intrinsics to simpler things that
+ // just get one value.
+ if (II->hasOneUse()) {
+ // Check if we're grabbing the overflow bit or the result of a 'with
+ // overflow' intrinsic. If it's the latter we can remove the intrinsic
+ // and replace it with a traditional binary instruction.
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ if (*EV.idx_begin() == 0) { // Normal result.
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ II->replaceAllUsesWith(UndefValue::get(II->getType()));
+ EraseInstFromFunction(*II);
+ return BinaryOperator::CreateAdd(LHS, RHS);
+ }
+ break;
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ if (*EV.idx_begin() == 0) { // Normal result.
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ II->replaceAllUsesWith(UndefValue::get(II->getType()));
+ EraseInstFromFunction(*II);
+ return BinaryOperator::CreateSub(LHS, RHS);
+ }
+ break;
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ if (*EV.idx_begin() == 0) { // Normal result.
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ II->replaceAllUsesWith(UndefValue::get(II->getType()));
+ EraseInstFromFunction(*II);
+ return BinaryOperator::CreateMul(LHS, RHS);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ // Can't simplify extracts from other values. Note that nested extracts are
+ // already simplified implicitely by the above (extract ( extract (insert) )
+ // will be translated into extract ( insert ( extract ) ) first and then just
+ // the value inserted, if appropriate).
+ return 0;
+}
+
+
+
+
+/// TryToSinkInstruction - Try to move the specified instruction from its
+/// current block into the beginning of DestBlock, which can only happen if it's
+/// safe to move the instruction past all of the instructions between it and the
+/// end of its block.
+static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
+ assert(I->hasOneUse() && "Invariants didn't hold!");
+
+ // Cannot move control-flow-involving, volatile loads, vaarg, etc.
+ if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I))
+ return false;
+
+ // Do not sink alloca instructions out of the entry block.
+ if (isa<AllocaInst>(I) && I->getParent() ==
+ &DestBlock->getParent()->getEntryBlock())
+ return false;
+
+ // We can only sink load instructions if there is nothing between the load and
+ // the end of block that could change the value.
+ if (I->mayReadFromMemory()) {
+ for (BasicBlock::iterator Scan = I, E = I->getParent()->end();
+ Scan != E; ++Scan)
+ if (Scan->mayWriteToMemory())
+ return false;
+ }
+
+ BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI();
+
+ I->moveBefore(InsertPos);
+ ++NumSunkInst;
+ return true;
+}
+
+
+/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding
+/// all reachable code to the worklist.
+///
+/// This has a couple of tricks to make the code faster and more powerful. In
+/// particular, we constant fold and DCE instructions as we go, to avoid adding
+/// them to the worklist (this significantly speeds up instcombine on code where
+/// many instructions are dead or constant). Additionally, if we find a branch
+/// whose condition is a known constant, we only visit the reachable successors.
+///
+static bool AddReachableCodeToWorklist(BasicBlock *BB,
+ SmallPtrSet<BasicBlock*, 64> &Visited,
+ InstCombiner &IC,
+ const TargetData *TD) {
+ bool MadeIRChange = false;
+ SmallVector<BasicBlock*, 256> Worklist;
+ Worklist.push_back(BB);
+
+ std::vector<Instruction*> InstrsForInstCombineWorklist;
+ InstrsForInstCombineWorklist.reserve(128);
+
+ SmallPtrSet<ConstantExpr*, 64> FoldedConstants;
+
+ do {
+ BB = Worklist.pop_back_val();
+
+ // We have now visited this block! If we've already been here, ignore it.
+ if (!Visited.insert(BB)) continue;
+
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+ Instruction *Inst = BBI++;
+
+ // DCE instruction if trivially dead.
+ if (isInstructionTriviallyDead(Inst)) {
+ ++NumDeadInst;
+ DEBUG(errs() << "IC: DCE: " << *Inst << '\n');
+ Inst->eraseFromParent();
+ continue;
+ }
+
+ // ConstantProp instruction if trivially constant.
+ if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
+ if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
+ DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
+ << *Inst << '\n');
+ Inst->replaceAllUsesWith(C);
+ ++NumConstProp;
+ Inst->eraseFromParent();
+ continue;
+ }
+
+ if (TD) {
+ // See if we can constant fold its operands.
+ for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
+ i != e; ++i) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
+ if (CE == 0) continue;
+
+ // If we already folded this constant, don't try again.
+ if (!FoldedConstants.insert(CE))
+ continue;
+
+ Constant *NewC = ConstantFoldConstantExpression(CE, TD);
+ if (NewC && NewC != CE) {
+ *i = NewC;
+ MadeIRChange = true;
+ }
+ }
+ }
+
+ InstrsForInstCombineWorklist.push_back(Inst);
+ }
+
+ // Recursively visit successors. If this is a branch or switch on a
+ // constant, only visit the reachable successor.
+ TerminatorInst *TI = BB->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
+ bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
+ BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
+ Worklist.push_back(ReachableBB);
+ continue;
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
+ // See if this is an explicit destination.
+ for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
+ if (SI->getCaseValue(i) == Cond) {
+ BasicBlock *ReachableBB = SI->getSuccessor(i);
+ Worklist.push_back(ReachableBB);
+ continue;
+ }
+
+ // Otherwise it is the default destination.
+ Worklist.push_back(SI->getSuccessor(0));
+ continue;
+ }
+ }
+
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ Worklist.push_back(TI->getSuccessor(i));
+ } while (!Worklist.empty());
+
+ // Once we've found all of the instructions to add to instcombine's worklist,
+ // add them in reverse order. This way instcombine will visit from the top
+ // of the function down. This jives well with the way that it adds all uses
+ // of instructions to the worklist after doing a transformation, thus avoiding
+ // some N^2 behavior in pathological cases.
+ IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
+ InstrsForInstCombineWorklist.size());
+
+ return MadeIRChange;
+}
+
+bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
+ MadeIRChange = false;
+
+ DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
+ << F.getNameStr() << "\n");
+
+ {
+ // Do a depth-first traversal of the function, populate the worklist with
+ // the reachable instructions. Ignore blocks that are not reachable. Keep
+ // track of which blocks we visit.
+ SmallPtrSet<BasicBlock*, 64> Visited;
+ MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD);
+
+ // Do a quick scan over the function. If we find any blocks that are
+ // unreachable, remove any instructions inside of them. This prevents
+ // the instcombine code from having to deal with some bad special cases.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (!Visited.count(BB)) {
+ Instruction *Term = BB->getTerminator();
+ while (Term != BB->begin()) { // Remove instrs bottom-up
+ BasicBlock::iterator I = Term; --I;
+
+ DEBUG(errs() << "IC: DCE: " << *I << '\n');
+ // A debug intrinsic shouldn't force another iteration if we weren't
+ // going to do one without it.
+ if (!isa<DbgInfoIntrinsic>(I)) {
+ ++NumDeadInst;
+ MadeIRChange = true;
+ }
+
+ // If I is not void type then replaceAllUsesWith undef.
+ // This allows ValueHandlers and custom metadata to adjust itself.
+ if (!I->getType()->isVoidTy())
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->eraseFromParent();
+ }
+ }
+ }
+
+ while (!Worklist.isEmpty()) {
+ Instruction *I = Worklist.RemoveOne();
+ if (I == 0) continue; // skip null values.
+
+ // Check to see if we can DCE the instruction.
+ if (isInstructionTriviallyDead(I)) {
+ DEBUG(errs() << "IC: DCE: " << *I << '\n');
+ EraseInstFromFunction(*I);
+ ++NumDeadInst;
+ MadeIRChange = true;
+ continue;
+ }
+
+ // Instruction isn't dead, see if we can constant propagate it.
+ if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
+ if (Constant *C = ConstantFoldInstruction(I, TD)) {
+ DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
+
+ // Add operands to the worklist.
+ ReplaceInstUsesWith(*I, C);
+ ++NumConstProp;
+ EraseInstFromFunction(*I);
+ MadeIRChange = true;
+ continue;
+ }
+
+ // See if we can trivially sink this instruction to a successor basic block.
+ if (I->hasOneUse()) {
+ BasicBlock *BB = I->getParent();
+ Instruction *UserInst = cast<Instruction>(I->use_back());
+ BasicBlock *UserParent;
+
+ // Get the block the use occurs in.
+ if (PHINode *PN = dyn_cast<PHINode>(UserInst))
+ UserParent = PN->getIncomingBlock(I->use_begin().getUse());
+ else
+ UserParent = UserInst->getParent();
+
+ if (UserParent != BB) {
+ bool UserIsSuccessor = false;
+ // See if the user is one of our successors.
+ for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
+ if (*SI == UserParent) {
+ UserIsSuccessor = true;
+ break;
+ }
+
+ // If the user is one of our immediate successors, and if that successor
+ // only has us as a predecessors (we'd have to split the critical edge
+ // otherwise), we can keep going.
+ if (UserIsSuccessor && UserParent->getSinglePredecessor())
+ // Okay, the CFG is simple enough, try to sink this instruction.
+ MadeIRChange |= TryToSinkInstruction(I, UserParent);
+ }
+ }
+
+ // Now that we have an instruction, try combining it to simplify it.
+ Builder->SetInsertPoint(I->getParent(), I);
+
+#ifndef NDEBUG
+ std::string OrigI;
+#endif
+ DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
+ DEBUG(errs() << "IC: Visiting: " << OrigI << '\n');
+
+ if (Instruction *Result = visit(*I)) {
+ ++NumCombined;
+ // Should we replace the old instruction with a new one?
+ if (Result != I) {
+ DEBUG(errs() << "IC: Old = " << *I << '\n'
+ << " New = " << *Result << '\n');
+
+ // Everything uses the new instruction now.
+ I->replaceAllUsesWith(Result);
+
+ // Push the new instruction and any users onto the worklist.
+ Worklist.Add(Result);
+ Worklist.AddUsersToWorkList(*Result);
+
+ // Move the name to the new instruction first.
+ Result->takeName(I);
+
+ // Insert the new instruction into the basic block...
+ BasicBlock *InstParent = I->getParent();
+ BasicBlock::iterator InsertPos = I;
+
+ if (!isa<PHINode>(Result)) // If combining a PHI, don't insert
+ while (isa<PHINode>(InsertPos)) // middle of a block of PHIs.
+ ++InsertPos;
+
+ InstParent->getInstList().insert(InsertPos, Result);
+
+ EraseInstFromFunction(*I);
+ } else {
+#ifndef NDEBUG
+ DEBUG(errs() << "IC: Mod = " << OrigI << '\n'
+ << " New = " << *I << '\n');
+#endif
+
+ // If the instruction was modified, it's possible that it is now dead.
+ // if so, remove it.
+ if (isInstructionTriviallyDead(I)) {
+ EraseInstFromFunction(*I);
+ } else {
+ Worklist.Add(I);
+ Worklist.AddUsersToWorkList(*I);
+ }
+ }
+ MadeIRChange = true;
+ }
+ }
+
+ Worklist.Zap();
+ return MadeIRChange;
+}
+
+
+bool InstCombiner::runOnFunction(Function &F) {
+ MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+ TD = getAnalysisIfAvailable<TargetData>();
+
+
+ /// Builder - This is an IRBuilder that automatically inserts new
+ /// instructions into the worklist when they are created.
+ IRBuilder<true, TargetFolder, InstCombineIRInserter>
+ TheBuilder(F.getContext(), TargetFolder(TD),
+ InstCombineIRInserter(Worklist));
+ Builder = &TheBuilder;
+
+ bool EverMadeChange = false;
+
+ // Iterate while there is work to do.
+ unsigned Iteration = 0;
+ while (DoOneIteration(F, Iteration++))
+ EverMadeChange = true;
+
+ Builder = 0;
+ return EverMadeChange;
+}
+
+FunctionPass *llvm::createInstructionCombiningPass() {
+ return new InstCombiner();
+}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/Makefile b/contrib/llvm/lib/Transforms/InstCombine/Makefile
new file mode 100644
index 0000000..0c488e78
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/InstCombine/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Transforms/InstCombine/Makefile -----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMInstCombine
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/contrib/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
new file mode 100644
index 0000000..128bf48
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_library(LLVMInstrumentation
+ EdgeProfiling.cpp
+ OptimalEdgeProfiling.cpp
+ ProfilingUtils.cpp
+ )
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp
new file mode 100644
index 0000000..a77d70c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp
@@ -0,0 +1,114 @@
+//===- EdgeProfiling.cpp - Insert counters for edge profiling -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass instruments the specified program with counters for edge profiling.
+// Edge profiling can give a reasonable approximation of the hot paths through a
+// program, and is used for a wide variety of program transformations.
+//
+// Note that this implementation is very naive. We insert a counter for *every*
+// edge in the program, instead of using control flow information to prune the
+// number of counters inserted.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "insert-edge-profiling"
+#include "ProfilingUtils.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/Statistic.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumEdgesInserted, "The # of edges inserted.");
+
+namespace {
+ class EdgeProfiler : public ModulePass {
+ bool runOnModule(Module &M);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ EdgeProfiler() : ModulePass(ID) {}
+
+ virtual const char *getPassName() const {
+ return "Edge Profiler";
+ }
+ };
+}
+
+char EdgeProfiler::ID = 0;
+INITIALIZE_PASS(EdgeProfiler, "insert-edge-profiling",
+ "Insert instrumentation for edge profiling", false, false);
+
+ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); }
+
+bool EdgeProfiler::runOnModule(Module &M) {
+ Function *Main = M.getFunction("main");
+ if (Main == 0) {
+ errs() << "WARNING: cannot insert edge profiling into a module"
+ << " with no main function!\n";
+ return false; // No main, no instrumentation!
+ }
+
+ std::set<BasicBlock*> BlocksToInstrument;
+ unsigned NumEdges = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ // Reserve space for (0,entry) edge.
+ ++NumEdges;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ // Keep track of which blocks need to be instrumented. We don't want to
+ // instrument blocks that are added as the result of breaking critical
+ // edges!
+ BlocksToInstrument.insert(BB);
+ NumEdges += BB->getTerminator()->getNumSuccessors();
+ }
+ }
+
+ const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges);
+ GlobalVariable *Counters =
+ new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
+ Constant::getNullValue(ATy), "EdgeProfCounters");
+ NumEdgesInserted = NumEdges;
+
+ // Instrument all of the edges...
+ unsigned i = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ // Create counter for (0,entry) edge.
+ IncrementCounterInBlock(&F->getEntryBlock(), i++, Counters);
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ if (BlocksToInstrument.count(BB)) { // Don't instrument inserted blocks
+ // Okay, we have to add a counter of each outgoing edge. If the
+ // outgoing edge is not critical don't split it, just insert the counter
+ // in the source or destination of the edge.
+ TerminatorInst *TI = BB->getTerminator();
+ for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+ // If the edge is critical, split it.
+ SplitCriticalEdge(TI, s, this);
+
+ // Okay, we are guaranteed that the edge is no longer critical. If we
+ // only have a single successor, insert the counter in this block,
+ // otherwise insert it in the successor block.
+ if (TI->getNumSuccessors() == 1) {
+ // Insert counter at the start of the block
+ IncrementCounterInBlock(BB, i++, Counters);
+ } else {
+ // Insert counter at the start of the block
+ IncrementCounterInBlock(TI->getSuccessor(s), i++, Counters);
+ }
+ }
+ }
+ }
+
+ // Add the initialization call to main.
+ InsertProfilingInitCall(Main, "llvm_start_edge_profiling", Counters);
+ return true;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/Makefile b/contrib/llvm/lib/Transforms/Instrumentation/Makefile
new file mode 100644
index 0000000..6cbc7a9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Transforms/Instrumentation/Makefile -------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMInstrumentation
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h
new file mode 100644
index 0000000..829da6b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -0,0 +1,108 @@
+//===- llvm/Analysis/MaximumSpanningTree.h - Interface ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This module privides means for calculating a maximum spanning tree for a
+// given set of weighted edges. The type parameter T is the type of a node.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
+#define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
+
+#include "llvm/BasicBlock.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include <vector>
+#include <algorithm>
+
+namespace llvm {
+
+ /// MaximumSpanningTree - A MST implementation.
+ /// The type parameter T determines the type of the nodes of the graph.
+ template <typename T>
+ class MaximumSpanningTree {
+
+ // A comparing class for comparing weighted edges.
+ template <typename CT>
+ struct EdgeWeightCompare {
+ bool operator()(typename MaximumSpanningTree<CT>::EdgeWeight X,
+ typename MaximumSpanningTree<CT>::EdgeWeight Y) const {
+ if (X.second > Y.second) return true;
+ if (X.second < Y.second) return false;
+ if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.first)) {
+ if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.first)) {
+ if (BBX->size() > BBY->size()) return true;
+ if (BBX->size() < BBY->size()) return false;
+ }
+ }
+ if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.second)) {
+ if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.second)) {
+ if (BBX->size() > BBY->size()) return true;
+ if (BBX->size() < BBY->size()) return false;
+ }
+ }
+ return false;
+ }
+ };
+
+ public:
+ typedef std::pair<const T*, const T*> Edge;
+ typedef std::pair<Edge, double> EdgeWeight;
+ typedef std::vector<EdgeWeight> EdgeWeights;
+ protected:
+ typedef std::vector<Edge> MaxSpanTree;
+
+ MaxSpanTree MST;
+
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+
+ /// MaximumSpanningTree() - Takes a vector of weighted edges and returns a
+ /// spanning tree.
+ MaximumSpanningTree(EdgeWeights &EdgeVector) {
+
+ std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare<T>());
+
+ // Create spanning tree, Forest contains a special data structure
+ // that makes checking if two nodes are already in a common (sub-)tree
+ // fast and cheap.
+ EquivalenceClasses<const T*> Forest;
+ for (typename EdgeWeights::iterator EWi = EdgeVector.begin(),
+ EWe = EdgeVector.end(); EWi != EWe; ++EWi) {
+ Edge e = (*EWi).first;
+
+ Forest.insert(e.first);
+ Forest.insert(e.second);
+ }
+
+ // Iterate over the sorted edges, biggest first.
+ for (typename EdgeWeights::iterator EWi = EdgeVector.begin(),
+ EWe = EdgeVector.end(); EWi != EWe; ++EWi) {
+ Edge e = (*EWi).first;
+
+ if (Forest.findLeader(e.first) != Forest.findLeader(e.second)) {
+ Forest.unionSets(e.first, e.second);
+ // So we know now that the edge is not already in a subtree, so we push
+ // the edge to the MST.
+ MST.push_back(e);
+ }
+ }
+ }
+
+ typename MaxSpanTree::iterator begin() {
+ return MST.begin();
+ }
+
+ typename MaxSpanTree::iterator end() {
+ return MST.end();
+ }
+ };
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
new file mode 100644
index 0000000..8eec987
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -0,0 +1,218 @@
+//===- OptimalEdgeProfiling.cpp - Insert counters for opt. edge profiling -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass instruments the specified program with counters for edge profiling.
+// Edge profiling can give a reasonable approximation of the hot paths through a
+// program, and is used for a wide variety of program transformations.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "insert-optimal-edge-profiling"
+#include "ProfilingUtils.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ProfileInfoLoader.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "MaximumSpanningTree.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumEdgesInserted, "The # of edges inserted.");
+
+namespace {
+ class OptimalEdgeProfiler : public ModulePass {
+ bool runOnModule(Module &M);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ OptimalEdgeProfiler() : ModulePass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(ProfileEstimatorPassID);
+ AU.addRequired<ProfileInfo>();
+ }
+
+ virtual const char *getPassName() const {
+ return "Optimal Edge Profiler";
+ }
+ };
+}
+
+char OptimalEdgeProfiler::ID = 0;
+INITIALIZE_PASS(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
+ "Insert optimal instrumentation for edge profiling",
+ false, false);
+
+ModulePass *llvm::createOptimalEdgeProfilerPass() {
+ return new OptimalEdgeProfiler();
+}
+
+inline static void printEdgeCounter(ProfileInfo::Edge e,
+ BasicBlock* b,
+ unsigned i) {
+ DEBUG(dbgs() << "--Edge Counter for " << (e) << " in " \
+ << ((b)?(b)->getNameStr():"0") << " (# " << (i) << ")\n");
+}
+
+bool OptimalEdgeProfiler::runOnModule(Module &M) {
+ Function *Main = M.getFunction("main");
+ if (Main == 0) {
+ errs() << "WARNING: cannot insert edge profiling into a module"
+ << " with no main function!\n";
+ return false; // No main, no instrumentation!
+ }
+
+ // NumEdges counts all the edges that may be instrumented. Later on its
+ // decided which edges to actually instrument, to achieve optimal profiling.
+ // For the entry block a virtual edge (0,entry) is reserved, for each block
+ // with no successors an edge (BB,0) is reserved. These edges are necessary
+ // to calculate a truly optimal maximum spanning tree and thus an optimal
+ // instrumentation.
+ unsigned NumEdges = 0;
+
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ // Reserve space for (0,entry) edge.
+ ++NumEdges;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ // Keep track of which blocks need to be instrumented. We don't want to
+ // instrument blocks that are added as the result of breaking critical
+ // edges!
+ if (BB->getTerminator()->getNumSuccessors() == 0) {
+ // Reserve space for (BB,0) edge.
+ ++NumEdges;
+ } else {
+ NumEdges += BB->getTerminator()->getNumSuccessors();
+ }
+ }
+ }
+
+ // In the profiling output a counter for each edge is reserved, but only few
+ // are used. This is done to be able to read back in the profile without
+ // calulating the maximum spanning tree again, instead each edge counter that
+ // is not used is initialised with -1 to signal that this edge counter has to
+ // be calculated from other edge counters on reading the profile info back
+ // in.
+
+ const Type *Int32 = Type::getInt32Ty(M.getContext());
+ const ArrayType *ATy = ArrayType::get(Int32, NumEdges);
+ GlobalVariable *Counters =
+ new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
+ Constant::getNullValue(ATy), "OptEdgeProfCounters");
+ NumEdgesInserted = 0;
+
+ std::vector<Constant*> Initializer(NumEdges);
+ Constant* Zero = ConstantInt::get(Int32, 0);
+ Constant* Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted);
+
+ // Instrument all of the edges not in MST...
+ unsigned i = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n");
+
+ // Calculate a Maximum Spanning Tree with the edge weights determined by
+ // ProfileEstimator. ProfileEstimator also assign weights to the virtual
+ // edges (0,entry) and (BB,0) (for blocks with no successors) and this
+ // edges also participate in the maximum spanning tree calculation.
+ // The third parameter of MaximumSpanningTree() has the effect that not the
+ // actual MST is returned but the edges _not_ in the MST.
+
+ ProfileInfo::EdgeWeights ECs =
+ getAnalysis<ProfileInfo>(*F).getEdgeWeights(F);
+ std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end());
+ MaximumSpanningTree<BasicBlock> MST (EdgeVector);
+ std::stable_sort(MST.begin(),MST.end());
+
+ // Check if (0,entry) not in the MST. If not, instrument edge
+ // (IncrementCounterInBlock()) and set the counter initially to zero, if
+ // the edge is in the MST the counter is initialised to -1.
+
+ BasicBlock *entry = &(F->getEntryBlock());
+ ProfileInfo::Edge edge = ProfileInfo::getEdge(0,entry);
+ if (!std::binary_search(MST.begin(), MST.end(), edge)) {
+ printEdgeCounter(edge,entry,i);
+ IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted;
+ Initializer[i++] = (Zero);
+ } else{
+ Initializer[i++] = (Uncounted);
+ }
+
+ // InsertedBlocks contains all blocks that were inserted for splitting an
+ // edge, this blocks do not have to be instrumented.
+ DenseSet<BasicBlock*> InsertedBlocks;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ // Check if block was not inserted and thus does not have to be
+ // instrumented.
+ if (InsertedBlocks.count(BB)) continue;
+
+ // Okay, we have to add a counter of each outgoing edge not in MST. If
+ // the outgoing edge is not critical don't split it, just insert the
+ // counter in the source or destination of the edge. Also, if the block
+ // has no successors, the virtual edge (BB,0) is processed.
+ TerminatorInst *TI = BB->getTerminator();
+ if (TI->getNumSuccessors() == 0) {
+ ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,0);
+ if (!std::binary_search(MST.begin(), MST.end(), edge)) {
+ printEdgeCounter(edge,BB,i);
+ IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
+ Initializer[i++] = (Zero);
+ } else{
+ Initializer[i++] = (Uncounted);
+ }
+ }
+ for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+ BasicBlock *Succ = TI->getSuccessor(s);
+ ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,Succ);
+ if (!std::binary_search(MST.begin(), MST.end(), edge)) {
+
+ // If the edge is critical, split it.
+ bool wasInserted = SplitCriticalEdge(TI, s, this);
+ Succ = TI->getSuccessor(s);
+ if (wasInserted)
+ InsertedBlocks.insert(Succ);
+
+ // Okay, we are guaranteed that the edge is no longer critical. If
+ // we only have a single successor, insert the counter in this block,
+ // otherwise insert it in the successor block.
+ if (TI->getNumSuccessors() == 1) {
+ // Insert counter at the start of the block
+ printEdgeCounter(edge,BB,i);
+ IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
+ } else {
+ // Insert counter at the start of the block
+ printEdgeCounter(edge,Succ,i);
+ IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted;
+ }
+ Initializer[i++] = (Zero);
+ } else {
+ Initializer[i++] = (Uncounted);
+ }
+ }
+ }
+ }
+
+ // Check if the number of edges counted at first was the number of edges we
+ // considered for instrumentation.
+ assert(i==NumEdges && "the number of edges in counting array is wrong");
+
+ // Assing the now completely defined initialiser to the array.
+ Constant *init = ConstantArray::get(ATy, Initializer);
+ Counters->setInitializer(init);
+
+ // Add the initialization call to main.
+ InsertProfilingInitCall(Main, "llvm_start_opt_edge_profiling", Counters);
+ return true;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp
new file mode 100644
index 0000000..1a30e9b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -0,0 +1,131 @@
+//===- ProfilingUtils.cpp - Helper functions shared by profilers ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a few helper functions which are used by profile
+// instrumentation code to instrument the code. This allows the profiler pass
+// to worry about *what* to insert, and these functions take care of *how* to do
+// it.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ProfilingUtils.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+
+void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
+ GlobalValue *Array) {
+ LLVMContext &Context = MainFn->getContext();
+ const Type *ArgVTy =
+ PointerType::getUnqual(Type::getInt8PtrTy(Context));
+ const PointerType *UIntPtr =
+ Type::getInt32PtrTy(Context);
+ Module &M = *MainFn->getParent();
+ Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context),
+ Type::getInt32Ty(Context),
+ ArgVTy, UIntPtr,
+ Type::getInt32Ty(Context),
+ (Type *)0);
+
+ // This could force argc and argv into programs that wouldn't otherwise have
+ // them, but instead we just pass null values in.
+ std::vector<Value*> Args(4);
+ Args[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Args[1] = Constant::getNullValue(ArgVTy);
+
+ // Skip over any allocas in the entry block.
+ BasicBlock *Entry = MainFn->begin();
+ BasicBlock::iterator InsertPos = Entry->begin();
+ while (isa<AllocaInst>(InsertPos)) ++InsertPos;
+
+ std::vector<Constant*> GEPIndices(2,
+ Constant::getNullValue(Type::getInt32Ty(Context)));
+ unsigned NumElements = 0;
+ if (Array) {
+ Args[2] = ConstantExpr::getGetElementPtr(Array, &GEPIndices[0],
+ GEPIndices.size());
+ NumElements =
+ cast<ArrayType>(Array->getType()->getElementType())->getNumElements();
+ } else {
+ // If this profiling instrumentation doesn't have a constant array, just
+ // pass null.
+ Args[2] = ConstantPointerNull::get(UIntPtr);
+ }
+ Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements);
+
+ CallInst *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(),
+ "newargc", InsertPos);
+
+ // If argc or argv are not available in main, just pass null values in.
+ Function::arg_iterator AI;
+ switch (MainFn->arg_size()) {
+ default:
+ case 2:
+ AI = MainFn->arg_begin(); ++AI;
+ if (AI->getType() != ArgVTy) {
+ Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy,
+ false);
+ InitCall->setArgOperand(1,
+ CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall));
+ } else {
+ InitCall->setArgOperand(1, AI);
+ }
+ /* FALL THROUGH */
+
+ case 1:
+ AI = MainFn->arg_begin();
+ // If the program looked at argc, have it look at the return value of the
+ // init call instead.
+ if (!AI->getType()->isIntegerTy(32)) {
+ Instruction::CastOps opcode;
+ if (!AI->use_empty()) {
+ opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true);
+ AI->replaceAllUsesWith(
+ CastInst::Create(opcode, InitCall, AI->getType(), "", InsertPos));
+ }
+ opcode = CastInst::getCastOpcode(AI, true,
+ Type::getInt32Ty(Context), true);
+ InitCall->setArgOperand(0,
+ CastInst::Create(opcode, AI, Type::getInt32Ty(Context),
+ "argc.cast", InitCall));
+ } else {
+ AI->replaceAllUsesWith(InitCall);
+ InitCall->setArgOperand(0, AI);
+ }
+
+ case 0: break;
+ }
+}
+
+void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
+ GlobalValue *CounterArray) {
+ // Insert the increment after any alloca or PHI instructions...
+ BasicBlock::iterator InsertPos = BB->getFirstNonPHI();
+ while (isa<AllocaInst>(InsertPos))
+ ++InsertPos;
+
+ LLVMContext &Context = BB->getContext();
+
+ // Create the getelementptr constant expression
+ std::vector<Constant*> Indices(2);
+ Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum);
+ Constant *ElementPtr =
+ ConstantExpr::getGetElementPtr(CounterArray, &Indices[0],
+ Indices.size());
+
+ // Load, increment and store the value back.
+ Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos);
+ Value *NewVal = BinaryOperator::Create(Instruction::Add, OldVal,
+ ConstantInt::get(Type::getInt32Ty(Context), 1),
+ "NewFuncCounter", InsertPos);
+ new StoreInst(NewVal, ElementPtr, InsertPos);
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h
new file mode 100644
index 0000000..94efffe
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h
@@ -0,0 +1,31 @@
+//===- ProfilingUtils.h - Helper functions shared by profilers --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a few helper functions which are used by profile
+// instrumentation code to instrument the code. This allows the profiler pass
+// to worry about *what* to insert, and these functions take care of *how* to do
+// it.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PROFILINGUTILS_H
+#define PROFILINGUTILS_H
+
+namespace llvm {
+ class Function;
+ class GlobalValue;
+ class BasicBlock;
+
+ void InsertProfilingInitCall(Function *MainFn, const char *FnName,
+ GlobalValue *Arr = 0);
+ void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
+ GlobalValue *CounterArray);
+}
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/Makefile b/contrib/llvm/lib/Transforms/Makefile
new file mode 100644
index 0000000..e527be2
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Makefile
@@ -0,0 +1,20 @@
+##===- lib/Transforms/Makefile -----------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Hello
+
+include $(LEVEL)/Makefile.config
+
+# No support for plugins on windows targets
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW Minix))
+ PARALLEL_DIRS := $(filter-out Hello, $(PARALLEL_DIRS))
+endif
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
new file mode 100644
index 0000000..ada086e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -0,0 +1,95 @@
+//===- DCE.cpp - Code to perform dead code elimination --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Aggressive Dead Code Elimination pass. This pass
+// optimistically assumes that all instructions are dead until proven otherwise,
+// allowing it to eliminate dead computations that other DCE passes do not
+// catch, particularly involving loop computations.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "adce"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumRemoved, "Number of instructions removed");
+
+namespace {
+ struct ADCE : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ ADCE() : FunctionPass(ID) {}
+
+ virtual bool runOnFunction(Function& F);
+
+ virtual void getAnalysisUsage(AnalysisUsage& AU) const {
+ AU.setPreservesCFG();
+ }
+
+ };
+}
+
+char ADCE::ID = 0;
+INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false);
+
+bool ADCE::runOnFunction(Function& F) {
+ SmallPtrSet<Instruction*, 128> alive;
+ SmallVector<Instruction*, 128> worklist;
+
+ // Collect the set of "root" instructions that are known live.
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+ if (isa<TerminatorInst>(I.getInstructionIterator()) ||
+ isa<DbgInfoIntrinsic>(I.getInstructionIterator()) ||
+ I->mayHaveSideEffects()) {
+ alive.insert(I.getInstructionIterator());
+ worklist.push_back(I.getInstructionIterator());
+ }
+
+ // Propagate liveness backwards to operands.
+ while (!worklist.empty()) {
+ Instruction* curr = worklist.pop_back_val();
+
+ for (Instruction::op_iterator OI = curr->op_begin(), OE = curr->op_end();
+ OI != OE; ++OI)
+ if (Instruction* Inst = dyn_cast<Instruction>(OI))
+ if (alive.insert(Inst))
+ worklist.push_back(Inst);
+ }
+
+ // The inverse of the live set is the dead set. These are those instructions
+ // which have no side effects and do not influence the control flow or return
+ // value of the function, and may therefore be deleted safely.
+ // NOTE: We reuse the worklist vector here for memory efficiency.
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+ if (!alive.count(I.getInstructionIterator())) {
+ worklist.push_back(I.getInstructionIterator());
+ I->dropAllReferences();
+ }
+
+ for (SmallVector<Instruction*, 1024>::iterator I = worklist.begin(),
+ E = worklist.end(); I != E; ++I) {
+ ++NumRemoved;
+ (*I)->eraseFromParent();
+ }
+
+ return !worklist.empty();
+}
+
+FunctionPass *llvm::createAggressiveDCEPass() {
+ return new ADCE();
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp
new file mode 100644
index 0000000..b144678
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp
@@ -0,0 +1,147 @@
+//===-- BasicBlockPlacement.cpp - Basic Block Code Layout optimization ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a very simple profile guided basic block placement
+// algorithm. The idea is to put frequently executed blocks together at the
+// start of the function, and hopefully increase the number of fall-through
+// conditional branches. If there is no profile information for a particular
+// function, this pass basically orders blocks in depth-first order
+//
+// The algorithm implemented here is basically "Algo1" from "Profile Guided Code
+// Positioning" by Pettis and Hansen, except that it uses basic block counts
+// instead of edge counts. This should be improved in many ways, but is very
+// simple for now.
+//
+// Basically we "place" the entry block, then loop over all successors in a DFO,
+// placing the most frequently executed successor until we run out of blocks. I
+// told you this was _extremely_ simplistic. :) This is also much slower than it
+// could be. When it becomes important, this pass will be rewritten to use a
+// better algorithm, and then we can worry about efficiency.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "block-placement"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Transforms/Scalar.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumMoved, "Number of basic blocks moved");
+
+namespace {
+ struct BlockPlacement : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ BlockPlacement() : FunctionPass(ID) {}
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<ProfileInfo>();
+ //AU.addPreserved<ProfileInfo>(); // Does this work?
+ }
+ private:
+ /// PI - The profile information that is guiding us.
+ ///
+ ProfileInfo *PI;
+
+ /// NumMovedBlocks - Every time we move a block, increment this counter.
+ ///
+ unsigned NumMovedBlocks;
+
+ /// PlacedBlocks - Every time we place a block, remember it so we don't get
+ /// into infinite loops.
+ std::set<BasicBlock*> PlacedBlocks;
+
+ /// InsertPos - This an iterator to the next place we want to insert a
+ /// block.
+ Function::iterator InsertPos;
+
+ /// PlaceBlocks - Recursively place the specified blocks and any unplaced
+ /// successors.
+ void PlaceBlocks(BasicBlock *BB);
+ };
+}
+
+char BlockPlacement::ID = 0;
+INITIALIZE_PASS(BlockPlacement, "block-placement",
+ "Profile Guided Basic Block Placement", false, false);
+
+FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); }
+
+bool BlockPlacement::runOnFunction(Function &F) {
+ PI = &getAnalysis<ProfileInfo>();
+
+ NumMovedBlocks = 0;
+ InsertPos = F.begin();
+
+ // Recursively place all blocks.
+ PlaceBlocks(F.begin());
+
+ PlacedBlocks.clear();
+ NumMoved += NumMovedBlocks;
+ return NumMovedBlocks != 0;
+}
+
+
+/// PlaceBlocks - Recursively place the specified blocks and any unplaced
+/// successors.
+void BlockPlacement::PlaceBlocks(BasicBlock *BB) {
+ assert(!PlacedBlocks.count(BB) && "Already placed this block!");
+ PlacedBlocks.insert(BB);
+
+ // Place the specified block.
+ if (&*InsertPos != BB) {
+ // Use splice to move the block into the right place. This avoids having to
+ // remove the block from the function then readd it, which causes a bunch of
+ // symbol table traffic that is entirely pointless.
+ Function::BasicBlockListType &Blocks = BB->getParent()->getBasicBlockList();
+ Blocks.splice(InsertPos, Blocks, BB);
+
+ ++NumMovedBlocks;
+ } else {
+ // This block is already in the right place, we don't have to do anything.
+ ++InsertPos;
+ }
+
+ // Keep placing successors until we run out of ones to place. Note that this
+ // loop is very inefficient (N^2) for blocks with many successors, like switch
+ // statements. FIXME!
+ while (1) {
+ // Okay, now place any unplaced successors.
+ succ_iterator SI = succ_begin(BB), E = succ_end(BB);
+
+ // Scan for the first unplaced successor.
+ for (; SI != E && PlacedBlocks.count(*SI); ++SI)
+ /*empty*/;
+ if (SI == E) return; // No more successors to place.
+
+ double MaxExecutionCount = PI->getExecutionCount(*SI);
+ BasicBlock *MaxSuccessor = *SI;
+
+ // Scan for more frequently executed successors
+ for (; SI != E; ++SI)
+ if (!PlacedBlocks.count(*SI)) {
+ double Count = PI->getExecutionCount(*SI);
+ if (Count > MaxExecutionCount ||
+ // Prefer to not disturb the code.
+ (Count == MaxExecutionCount && *SI == &*InsertPos)) {
+ MaxExecutionCount = Count;
+ MaxSuccessor = *SI;
+ }
+ }
+
+ // Now that we picked the maximally executed successor, place it.
+ PlaceBlocks(MaxSuccessor);
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/CMakeLists.txt b/contrib/llvm/lib/Transforms/Scalar/CMakeLists.txt
new file mode 100644
index 0000000..b7598ea
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -0,0 +1,35 @@
+add_llvm_library(LLVMScalarOpts
+ ADCE.cpp
+ BasicBlockPlacement.cpp
+ CodeGenPrepare.cpp
+ ConstantProp.cpp
+ CorrelatedValuePropagation.cpp
+ DCE.cpp
+ DeadStoreElimination.cpp
+ GEPSplitter.cpp
+ GVN.cpp
+ IndVarSimplify.cpp
+ JumpThreading.cpp
+ LICM.cpp
+ LoopDeletion.cpp
+ LoopIndexSplit.cpp
+ LoopRotation.cpp
+ LoopStrengthReduce.cpp
+ LoopUnrollPass.cpp
+ LoopUnswitch.cpp
+ LowerAtomic.cpp
+ MemCpyOptimizer.cpp
+ Reassociate.cpp
+ Reg2Mem.cpp
+ SCCP.cpp
+ Scalar.cpp
+ ScalarReplAggregates.cpp
+ SimplifyCFGPass.cpp
+ SimplifyHalfPowrLibCalls.cpp
+ SimplifyLibCalls.cpp
+ Sink.cpp
+ TailDuplication.cpp
+ TailRecursionElimination.cpp
+ )
+
+target_link_libraries (LLVMScalarOpts LLVMTransformUtils)
diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
new file mode 100644
index 0000000..e07b761
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -0,0 +1,981 @@
+//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass munges the code in the input function to better prepare it for
+// SelectionDAG-based code generation. This works around limitations in it's
+// basic-block-at-a-time approach. It should eventually be removed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegenprepare"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/AddrModeMatcher.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/IRBuilder.h"
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+static cl::opt<bool>
+CriticalEdgeSplit("cgp-critical-edge-splitting",
+ cl::desc("Split critical edges during codegen prepare"),
+ cl::init(true), cl::Hidden);
+
+namespace {
+ class CodeGenPrepare : public FunctionPass {
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// transformation profitability.
+ const TargetLowering *TLI;
+ ProfileInfo *PFI;
+
+ /// BackEdges - Keep a set of all the loop back edges.
+ ///
+ SmallSet<std::pair<const BasicBlock*, const BasicBlock*>, 8> BackEdges;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit CodeGenPrepare(const TargetLowering *tli = 0)
+ : FunctionPass(ID), TLI(tli) {}
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<ProfileInfo>();
+ }
+
+ virtual void releaseMemory() {
+ BackEdges.clear();
+ }
+
+ private:
+ bool EliminateMostlyEmptyBlocks(Function &F);
+ bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
+ void EliminateMostlyEmptyBlock(BasicBlock *BB);
+ bool OptimizeBlock(BasicBlock &BB);
+ bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy,
+ DenseMap<Value*,Value*> &SunkAddrs);
+ bool OptimizeInlineAsmInst(Instruction *I, CallSite CS,
+ DenseMap<Value*,Value*> &SunkAddrs);
+ bool OptimizeCallInst(CallInst *CI);
+ bool MoveExtToFormExtLoad(Instruction *I);
+ bool OptimizeExtUses(Instruction *I);
+ void findLoopBackEdges(const Function &F);
+ };
+}
+
+char CodeGenPrepare::ID = 0;
+INITIALIZE_PASS(CodeGenPrepare, "codegenprepare",
+ "Optimize for code generation", false, false);
+
+FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
+ return new CodeGenPrepare(TLI);
+}
+
+/// findLoopBackEdges - Do a DFS walk to find loop back edges.
+///
+void CodeGenPrepare::findLoopBackEdges(const Function &F) {
+ SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
+ FindFunctionBackedges(F, Edges);
+
+ BackEdges.insert(Edges.begin(), Edges.end());
+}
+
+
+bool CodeGenPrepare::runOnFunction(Function &F) {
+ bool EverMadeChange = false;
+
+ PFI = getAnalysisIfAvailable<ProfileInfo>();
+ // First pass, eliminate blocks that contain only PHI nodes and an
+ // unconditional branch.
+ EverMadeChange |= EliminateMostlyEmptyBlocks(F);
+
+ // Now find loop back edges.
+ findLoopBackEdges(F);
+
+ bool MadeChange = true;
+ while (MadeChange) {
+ MadeChange = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ MadeChange |= OptimizeBlock(*BB);
+ EverMadeChange |= MadeChange;
+ }
+ return EverMadeChange;
+}
+
+/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
+/// debug info directives, and an unconditional branch. Passes before isel
+/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
+/// isel. Start by eliminating these blocks so we can split them the way we
+/// want them.
+bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
+ bool MadeChange = false;
+ // Note that this intentionally skips the entry block.
+ for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
+ BasicBlock *BB = I++;
+
+ // If this block doesn't end with an uncond branch, ignore it.
+ BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || !BI->isUnconditional())
+ continue;
+
+ // If the instruction before the branch (skipping debug info) isn't a phi
+ // node, then other stuff is happening here.
+ BasicBlock::iterator BBI = BI;
+ if (BBI != BB->begin()) {
+ --BBI;
+ while (isa<DbgInfoIntrinsic>(BBI)) {
+ if (BBI == BB->begin())
+ break;
+ --BBI;
+ }
+ if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
+ continue;
+ }
+
+ // Do not break infinite loops.
+ BasicBlock *DestBB = BI->getSuccessor(0);
+ if (DestBB == BB)
+ continue;
+
+ if (!CanMergeBlocks(BB, DestBB))
+ continue;
+
+ EliminateMostlyEmptyBlock(BB);
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
+/// single uncond branch between them, and BB contains no other non-phi
+/// instructions.
+bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
+ const BasicBlock *DestBB) const {
+ // We only want to eliminate blocks whose phi nodes are used by phi nodes in
+ // the successor. If there are more complex condition (e.g. preheaders),
+ // don't mess around with them.
+ BasicBlock::const_iterator BBI = BB->begin();
+ while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
+ for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end();
+ UI != E; ++UI) {
+ const Instruction *User = cast<Instruction>(*UI);
+ if (User->getParent() != DestBB || !isa<PHINode>(User))
+ return false;
+ // If User is inside DestBB block and it is a PHINode then check
+ // incoming value. If incoming value is not from BB then this is
+ // a complex condition (e.g. preheaders) we want to avoid here.
+ if (User->getParent() == DestBB) {
+ if (const PHINode *UPN = dyn_cast<PHINode>(User))
+ for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
+ Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
+ if (Insn && Insn->getParent() == BB &&
+ Insn->getParent() != UPN->getIncomingBlock(I))
+ return false;
+ }
+ }
+ }
+ }
+
+ // If BB and DestBB contain any common predecessors, then the phi nodes in BB
+ // and DestBB may have conflicting incoming values for the block. If so, we
+ // can't merge the block.
+ const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
+ if (!DestBBPN) return true; // no conflict.
+
+ // Collect the preds of BB.
+ SmallPtrSet<const BasicBlock*, 16> BBPreds;
+ if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
+ // It is faster to get preds from a PHI than with pred_iterator.
+ for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
+ BBPreds.insert(BBPN->getIncomingBlock(i));
+ } else {
+ BBPreds.insert(pred_begin(BB), pred_end(BB));
+ }
+
+ // Walk the preds of DestBB.
+ for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
+ if (BBPreds.count(Pred)) { // Common predecessor?
+ BBI = DestBB->begin();
+ while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
+ const Value *V1 = PN->getIncomingValueForBlock(Pred);
+ const Value *V2 = PN->getIncomingValueForBlock(BB);
+
+ // If V2 is a phi node in BB, look up what the mapped value will be.
+ if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
+ if (V2PN->getParent() == BB)
+ V2 = V2PN->getIncomingValueForBlock(Pred);
+
+ // If there is a conflict, bail out.
+ if (V1 != V2) return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+
+/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
+/// an unconditional branch in it.
+void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
+ BranchInst *BI = cast<BranchInst>(BB->getTerminator());
+ BasicBlock *DestBB = BI->getSuccessor(0);
+
+ DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
+
+ // If the destination block has a single pred, then this is a trivial edge,
+ // just collapse it.
+ if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
+ if (SinglePred != DestBB) {
+ // Remember if SinglePred was the entry block of the function. If so, we
+ // will need to move BB back to the entry position.
+ bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+ MergeBasicBlockIntoOnlyPred(DestBB, this);
+
+ if (isEntry && BB != &BB->getParent()->getEntryBlock())
+ BB->moveBefore(&BB->getParent()->getEntryBlock());
+
+ DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
+ return;
+ }
+ }
+
+ // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
+ // to handle the new incoming edges it is about to have.
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = DestBB->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ // Remove the incoming value for BB, and remember it.
+ Value *InVal = PN->removeIncomingValue(BB, false);
+
+ // Two options: either the InVal is a phi node defined in BB or it is some
+ // value that dominates BB.
+ PHINode *InValPhi = dyn_cast<PHINode>(InVal);
+ if (InValPhi && InValPhi->getParent() == BB) {
+ // Add all of the input values of the input PHI as inputs of this phi.
+ for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
+ PN->addIncoming(InValPhi->getIncomingValue(i),
+ InValPhi->getIncomingBlock(i));
+ } else {
+ // Otherwise, add one instance of the dominating value for each edge that
+ // we will be adding.
+ if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
+ for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
+ PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
+ } else {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ PN->addIncoming(InVal, *PI);
+ }
+ }
+ }
+
+ // The PHIs are now updated, change everything that refers to BB to use
+ // DestBB and remove BB.
+ BB->replaceAllUsesWith(DestBB);
+ if (PFI) {
+ PFI->replaceAllUses(BB, DestBB);
+ PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
+ }
+ BB->eraseFromParent();
+
+ DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
+}
+
+/// FindReusablePredBB - Check all of the predecessors of the block DestPHI
+/// lives in to see if there is a block that we can reuse as a critical edge
+/// from TIBB.
+static BasicBlock *FindReusablePredBB(PHINode *DestPHI, BasicBlock *TIBB) {
+ BasicBlock *Dest = DestPHI->getParent();
+
+ /// TIPHIValues - This array is lazily computed to determine the values of
+ /// PHIs in Dest that TI would provide.
+ SmallVector<Value*, 32> TIPHIValues;
+
+ /// TIBBEntryNo - This is a cache to speed up pred queries for TIBB.
+ unsigned TIBBEntryNo = 0;
+
+ // Check to see if Dest has any blocks that can be used as a split edge for
+ // this terminator.
+ for (unsigned pi = 0, e = DestPHI->getNumIncomingValues(); pi != e; ++pi) {
+ BasicBlock *Pred = DestPHI->getIncomingBlock(pi);
+ // To be usable, the pred has to end with an uncond branch to the dest.
+ BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator());
+ if (!PredBr || !PredBr->isUnconditional())
+ continue;
+ // Must be empty other than the branch and debug info.
+ BasicBlock::iterator I = Pred->begin();
+ while (isa<DbgInfoIntrinsic>(I))
+ I++;
+ if (&*I != PredBr)
+ continue;
+ // Cannot be the entry block; its label does not get emitted.
+ if (Pred == &Dest->getParent()->getEntryBlock())
+ continue;
+
+ // Finally, since we know that Dest has phi nodes in it, we have to make
+ // sure that jumping to Pred will have the same effect as going to Dest in
+ // terms of PHI values.
+ PHINode *PN;
+ unsigned PHINo = 0;
+ unsigned PredEntryNo = pi;
+
+ bool FoundMatch = true;
+ for (BasicBlock::iterator I = Dest->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) {
+ if (PHINo == TIPHIValues.size()) {
+ if (PN->getIncomingBlock(TIBBEntryNo) != TIBB)
+ TIBBEntryNo = PN->getBasicBlockIndex(TIBB);
+ TIPHIValues.push_back(PN->getIncomingValue(TIBBEntryNo));
+ }
+
+ // If the PHI entry doesn't work, we can't use this pred.
+ if (PN->getIncomingBlock(PredEntryNo) != Pred)
+ PredEntryNo = PN->getBasicBlockIndex(Pred);
+
+ if (TIPHIValues[PHINo] != PN->getIncomingValue(PredEntryNo)) {
+ FoundMatch = false;
+ break;
+ }
+ }
+
+ // If we found a workable predecessor, change TI to branch to Succ.
+ if (FoundMatch)
+ return Pred;
+ }
+ return 0;
+}
+
+
+/// SplitEdgeNicely - Split the critical edge from TI to its specified
+/// successor if it will improve codegen. We only do this if the successor has
+/// phi nodes (otherwise critical edges are ok). If there is already another
+/// predecessor of the succ that is empty (and thus has no phi nodes), use it
+/// instead of introducing a new block.
+static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
+ SmallSet<std::pair<const BasicBlock*,
+ const BasicBlock*>, 8> &BackEdges,
+ Pass *P) {
+ BasicBlock *TIBB = TI->getParent();
+ BasicBlock *Dest = TI->getSuccessor(SuccNum);
+ assert(isa<PHINode>(Dest->begin()) &&
+ "This should only be called if Dest has a PHI!");
+ PHINode *DestPHI = cast<PHINode>(Dest->begin());
+
+ // Do not split edges to EH landing pads.
+ if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI))
+ if (Invoke->getSuccessor(1) == Dest)
+ return;
+
+ // As a hack, never split backedges of loops. Even though the copy for any
+ // PHIs inserted on the backedge would be dead for exits from the loop, we
+ // assume that the cost of *splitting* the backedge would be too high.
+ if (BackEdges.count(std::make_pair(TIBB, Dest)))
+ return;
+
+ if (BasicBlock *ReuseBB = FindReusablePredBB(DestPHI, TIBB)) {
+ ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>();
+ if (PFI)
+ PFI->splitEdge(TIBB, Dest, ReuseBB);
+ Dest->removePredecessor(TIBB);
+ TI->setSuccessor(SuccNum, ReuseBB);
+ return;
+ }
+
+ SplitCriticalEdge(TI, SuccNum, P, true);
+}
+
+
+/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
+/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
+/// sink it into user blocks to reduce the number of virtual
+/// registers that must be created and coalesced.
+///
+/// Return true if any changes are made.
+///
+static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
+ // If this is a noop copy,
+ EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(CI->getType());
+
+ // This is an fp<->int conversion?
+ if (SrcVT.isInteger() != DstVT.isInteger())
+ return false;
+
+ // If this is an extension, it will be a zero or sign extension, which
+ // isn't a noop.
+ if (SrcVT.bitsLT(DstVT)) return false;
+
+ // If these values will be promoted, find out what they will be promoted
+ // to. This helps us consider truncates on PPC as noop copies when they
+ // are.
+ if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote)
+ SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
+ if (TLI.getTypeAction(DstVT) == TargetLowering::Promote)
+ DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
+
+ // If, after promotion, these are the same types, this is a noop copy.
+ if (SrcVT != DstVT)
+ return false;
+
+ BasicBlock *DefBB = CI->getParent();
+
+ /// InsertedCasts - Only insert a cast in each block once.
+ DenseMap<BasicBlock*, CastInst*> InsertedCasts;
+
+ bool MadeChange = false;
+ for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
+ UI != E; ) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Figure out which BB this cast is used in. For PHI's this is the
+ // appropriate predecessor block.
+ BasicBlock *UserBB = User->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(User)) {
+ UserBB = PN->getIncomingBlock(UI);
+ }
+
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ // If this user is in the same block as the cast, don't change the cast.
+ if (UserBB == DefBB) continue;
+
+ // If we have already inserted a cast into this block, use it.
+ CastInst *&InsertedCast = InsertedCasts[UserBB];
+
+ if (!InsertedCast) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
+
+ InsertedCast =
+ CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
+ InsertPt);
+ MadeChange = true;
+ }
+
+ // Replace a use of the cast with a use of the new cast.
+ TheUse = InsertedCast;
+ }
+
+ // If we removed all uses, nuke the cast.
+ if (CI->use_empty()) {
+ CI->eraseFromParent();
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
+/// the number of virtual registers that must be created and coalesced. This is
+/// a clear win except on targets with multiple condition code registers
+/// (PowerPC), where it might lose; some adjustment may be wanted there.
+///
+/// Return true if any changes are made.
+static bool OptimizeCmpExpression(CmpInst *CI) {
+ BasicBlock *DefBB = CI->getParent();
+
+ /// InsertedCmp - Only insert a cmp in each block once.
+ DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
+
+ bool MadeChange = false;
+ for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
+ UI != E; ) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ // Don't bother for PHI nodes.
+ if (isa<PHINode>(User))
+ continue;
+
+ // Figure out which BB this cmp is used in.
+ BasicBlock *UserBB = User->getParent();
+
+ // If this user is in the same block as the cmp, don't change the cmp.
+ if (UserBB == DefBB) continue;
+
+ // If we have already inserted a cmp into this block, use it.
+ CmpInst *&InsertedCmp = InsertedCmps[UserBB];
+
+ if (!InsertedCmp) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
+
+ InsertedCmp =
+ CmpInst::Create(CI->getOpcode(),
+ CI->getPredicate(), CI->getOperand(0),
+ CI->getOperand(1), "", InsertPt);
+ MadeChange = true;
+ }
+
+ // Replace a use of the cmp with a use of the new cmp.
+ TheUse = InsertedCmp;
+ }
+
+ // If we removed all uses, nuke the cmp.
+ if (CI->use_empty())
+ CI->eraseFromParent();
+
+ return MadeChange;
+}
+
+namespace {
+class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
+protected:
+ void replaceCall(Value *With) {
+ CI->replaceAllUsesWith(With);
+ CI->eraseFromParent();
+ }
+ bool isFoldable(unsigned SizeCIOp, unsigned, bool) const {
+ if (ConstantInt *SizeCI =
+ dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp)))
+ return SizeCI->isAllOnesValue();
+ return false;
+ }
+};
+} // end anonymous namespace
+
+bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
+ // Lower all uses of llvm.objectsize.*
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
+ if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
+ bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
+ const Type *ReturnTy = CI->getType();
+ Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
+ CI->replaceAllUsesWith(RetVal);
+ CI->eraseFromParent();
+ return true;
+ }
+
+ // From here on out we're working with named functions.
+ if (CI->getCalledFunction() == 0) return false;
+
+ // We'll need TargetData from here on out.
+ const TargetData *TD = TLI ? TLI->getTargetData() : 0;
+ if (!TD) return false;
+
+ // Lower all default uses of _chk calls. This is very similar
+ // to what InstCombineCalls does, but here we are only lowering calls
+ // that have the default "don't know" as the objectsize. Anything else
+ // should be left alone.
+ CodeGenPrepareFortifiedLibCalls Simplifier;
+ return Simplifier.fold(CI, TD);
+}
+//===----------------------------------------------------------------------===//
+// Memory Optimization
+//===----------------------------------------------------------------------===//
+
+/// IsNonLocalValue - Return true if the specified values are defined in a
+/// different basic block than BB.
+static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() != BB;
+ return false;
+}
+
+/// OptimizeMemoryInst - Load and Store Instructions often have
+/// addressing modes that can do significant amounts of computation. As such,
+/// instruction selection will try to get the load or store to do as much
+/// computation as possible for the program. The problem is that isel can only
+/// see within a single block. As such, we sink as much legal addressing mode
+/// stuff into the block as possible.
+///
+/// This method is used to optimize both load/store and inline asms with memory
+/// operands.
+bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
+ const Type *AccessTy,
+ DenseMap<Value*,Value*> &SunkAddrs) {
+ // Figure out what addressing mode will be built up for this operation.
+ SmallVector<Instruction*, 16> AddrModeInsts;
+ ExtAddrMode AddrMode = AddressingModeMatcher::Match(Addr, AccessTy,MemoryInst,
+ AddrModeInsts, *TLI);
+
+ // Check to see if any of the instructions supersumed by this addr mode are
+ // non-local to I's BB.
+ bool AnyNonLocal = false;
+ for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
+ if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
+ AnyNonLocal = true;
+ break;
+ }
+ }
+
+ // If all the instructions matched are already in this BB, don't do anything.
+ if (!AnyNonLocal) {
+ DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n");
+ return false;
+ }
+
+ // Insert this computation right after this user. Since our caller is
+ // scanning from the top of the BB to the bottom, reuse of the expr are
+ // guaranteed to happen later.
+ BasicBlock::iterator InsertPt = MemoryInst;
+
+ // Now that we determined the addressing expression we want to use and know
+ // that we have to sink it into this block. Check to see if we have already
+ // done this for some other load/store instr in this block. If so, reuse the
+ // computation.
+ Value *&SunkAddr = SunkAddrs[Addr];
+ if (SunkAddr) {
+ DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
+ << *MemoryInst);
+ if (SunkAddr->getType() != Addr->getType())
+ SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt);
+ } else {
+ DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
+ << *MemoryInst);
+ const Type *IntPtrTy =
+ TLI->getTargetData()->getIntPtrType(AccessTy->getContext());
+
+ Value *Result = 0;
+
+ // Start with the base register. Do this first so that subsequent address
+ // matching finds it last, which will prevent it from trying to match it
+ // as the scaled value in case it happens to be a mul. That would be
+ // problematic if we've sunk a different mul for the scale, because then
+ // we'd end up sinking both muls.
+ if (AddrMode.BaseReg) {
+ Value *V = AddrMode.BaseReg;
+ if (V->getType()->isPointerTy())
+ V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ if (V->getType() != IntPtrTy)
+ V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true,
+ "sunkaddr", InsertPt);
+ Result = V;
+ }
+
+ // Add the scale value.
+ if (AddrMode.Scale) {
+ Value *V = AddrMode.ScaledReg;
+ if (V->getType() == IntPtrTy) {
+ // done.
+ } else if (V->getType()->isPointerTy()) {
+ V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
+ cast<IntegerType>(V->getType())->getBitWidth()) {
+ V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ } else {
+ V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ }
+ if (AddrMode.Scale != 1)
+ V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy,
+ AddrMode.Scale),
+ "sunkaddr", InsertPt);
+ if (Result)
+ Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+ else
+ Result = V;
+ }
+
+ // Add in the BaseGV if present.
+ if (AddrMode.BaseGV) {
+ Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr",
+ InsertPt);
+ if (Result)
+ Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+ else
+ Result = V;
+ }
+
+ // Add in the Base Offset if present.
+ if (AddrMode.BaseOffs) {
+ Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
+ if (Result)
+ Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+ else
+ Result = V;
+ }
+
+ if (Result == 0)
+ SunkAddr = Constant::getNullValue(Addr->getType());
+ else
+ SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
+ }
+
+ MemoryInst->replaceUsesOfWith(Addr, SunkAddr);
+
+ if (Addr->use_empty()) {
+ RecursivelyDeleteTriviallyDeadInstructions(Addr);
+ // This address is now available for reassignment, so erase the table entry;
+ // we don't want to match some completely different instruction.
+ SunkAddrs[Addr] = 0;
+ }
+ return true;
+}
+
+/// OptimizeInlineAsmInst - If there are any memory operands, use
+/// OptimizeMemoryInst to sink their address computing into the block when
+/// possible / profitable.
+bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS,
+ DenseMap<Value*,Value*> &SunkAddrs) {
+ bool MadeChange = false;
+ InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+
+ // Do a prepass over the constraints, canonicalizing them, and building up the
+ // ConstraintOperands list.
+ std::vector<InlineAsm::ConstraintInfo>
+ ConstraintInfos = IA->ParseConstraints();
+
+ /// ConstraintOperands - Information about all of the constraints.
+ std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands;
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+ ConstraintOperands.
+ push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i]));
+ TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ if (OpInfo.isIndirect)
+ OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI->ComputeConstraintToUse(OpInfo, SDValue());
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ OpInfo.isIndirect) {
+ Value *OpVal = OpInfo.CallOperandVal;
+ MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs);
+ }
+ }
+
+ return MadeChange;
+}
+
+/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
+/// basic block as the load, unless conditions are unfavorable. This allows
+/// SelectionDAG to fold the extend into the load.
+///
+bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
+ // Look for a load being extended.
+ LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0));
+ if (!LI) return false;
+
+ // If they're already in the same block, there's nothing to do.
+ if (LI->getParent() == I->getParent())
+ return false;
+
+ // If the load has other users and the truncate is not free, this probably
+ // isn't worthwhile.
+ if (!LI->hasOneUse() &&
+ TLI && !TLI->isTruncateFree(I->getType(), LI->getType()))
+ return false;
+
+ // Check whether the target supports casts folded into loads.
+ unsigned LType;
+ if (isa<ZExtInst>(I))
+ LType = ISD::ZEXTLOAD;
+ else {
+ assert(isa<SExtInst>(I) && "Unexpected ext type!");
+ LType = ISD::SEXTLOAD;
+ }
+ if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType())))
+ return false;
+
+ // Move the extend into the same block as the load, so that SelectionDAG
+ // can fold it.
+ I->removeFromParent();
+ I->insertAfter(LI);
+ return true;
+}
+
+bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
+ BasicBlock *DefBB = I->getParent();
+
+ // If both result of the {s|z}xt and its source are live out, rewrite all
+ // other uses of the source with result of extension.
+ Value *Src = I->getOperand(0);
+ if (Src->hasOneUse())
+ return false;
+
+ // Only do this xform if truncating is free.
+ if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
+ return false;
+
+ // Only safe to perform the optimization if the source is also defined in
+ // this block.
+ if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
+ return false;
+
+ bool DefIsLiveOut = false;
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Figure out which BB this ext is used in.
+ BasicBlock *UserBB = User->getParent();
+ if (UserBB == DefBB) continue;
+ DefIsLiveOut = true;
+ break;
+ }
+ if (!DefIsLiveOut)
+ return false;
+
+ // Make sure non of the uses are PHI nodes.
+ for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ BasicBlock *UserBB = User->getParent();
+ if (UserBB == DefBB) continue;
+ // Be conservative. We don't want this xform to end up introducing
+ // reloads just before load / store instructions.
+ if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User))
+ return false;
+ }
+
+ // InsertedTruncs - Only insert one trunc in each block once.
+ DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
+
+ bool MadeChange = false;
+ for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
+ UI != E; ++UI) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Figure out which BB this ext is used in.
+ BasicBlock *UserBB = User->getParent();
+ if (UserBB == DefBB) continue;
+
+ // Both src and def are live in this block. Rewrite the use.
+ Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
+
+ if (!InsertedTrunc) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
+
+ InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
+ }
+
+ // Replace a use of the {s|z}ext source with a use of the result.
+ TheUse = InsertedTrunc;
+
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+// In this pass we look for GEP and cast instructions that are used
+// across basic blocks and rewrite them to improve basic-block-at-a-time
+// selection.
+bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
+ bool MadeChange = false;
+
+ // Split all critical edges where the dest block has a PHI.
+ if (CriticalEdgeSplit) {
+ TerminatorInst *BBTI = BB.getTerminator();
+ if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
+ for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *SuccBB = BBTI->getSuccessor(i);
+ if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
+ SplitEdgeNicely(BBTI, i, BackEdges, this);
+ }
+ }
+ }
+
+ // Keep track of non-local addresses that have been sunk into this block.
+ // This allows us to avoid inserting duplicate code for blocks with multiple
+ // load/stores of the same address.
+ DenseMap<Value*, Value*> SunkAddrs;
+
+ for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) {
+ Instruction *I = BBI++;
+
+ if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ // If the source of the cast is a constant, then this should have
+ // already been constant folded. The only reason NOT to constant fold
+ // it is if something (e.g. LSR) was careful to place the constant
+ // evaluation in a block other than then one that uses it (e.g. to hoist
+ // the address of globals out of a loop). If this is the case, we don't
+ // want to forward-subst the cast.
+ if (isa<Constant>(CI->getOperand(0)))
+ continue;
+
+ bool Change = false;
+ if (TLI) {
+ Change = OptimizeNoopCopyExpression(CI, *TLI);
+ MadeChange |= Change;
+ }
+
+ if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) {
+ MadeChange |= MoveExtToFormExtLoad(I);
+ MadeChange |= OptimizeExtUses(I);
+ }
+ } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
+ MadeChange |= OptimizeCmpExpression(CI);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (TLI)
+ MadeChange |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType(),
+ SunkAddrs);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (TLI)
+ MadeChange |= OptimizeMemoryInst(I, SI->getOperand(1),
+ SI->getOperand(0)->getType(),
+ SunkAddrs);
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+ if (GEPI->hasAllZeroIndices()) {
+ /// The GEP operand must be a pointer, so must its result -> BitCast
+ Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
+ GEPI->getName(), GEPI);
+ GEPI->replaceAllUsesWith(NC);
+ GEPI->eraseFromParent();
+ MadeChange = true;
+ BBI = NC;
+ }
+ } else if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ // If we found an inline asm expession, and if the target knows how to
+ // lower it to normal LLVM code, do so now.
+ if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
+ if (TLI->ExpandInlineAsm(CI)) {
+ BBI = BB.begin();
+ // Avoid processing instructions out of order, which could cause
+ // reuse before a value is defined.
+ SunkAddrs.clear();
+ } else
+ // Sink address computing for memory operands into the block.
+ MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs);
+ } else {
+ // Other CallInst optimizations that don't need to muck with the
+ // enclosing iterator here.
+ MadeChange |= OptimizeCallInst(CI);
+ }
+ }
+ }
+
+ return MadeChange;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
new file mode 100644
index 0000000..a0ea369
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
@@ -0,0 +1,89 @@
+//===- ConstantProp.cpp - Code to perform Simple Constant Propagation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements constant propagation and merging:
+//
+// Specifically, this:
+// * Converts instructions like "add int 1, 2" into 3
+//
+// Notice that:
+// * This pass has a habit of making definitions be dead. It is a good idea
+// to run a DIE pass sometime after running this pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "constprop"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Constant.h"
+#include "llvm/Instruction.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInstKilled, "Number of instructions killed");
+
+namespace {
+ struct ConstantPropagation : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ ConstantPropagation() : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+ };
+}
+
+char ConstantPropagation::ID = 0;
+INITIALIZE_PASS(ConstantPropagation, "constprop",
+ "Simple constant propagation", false, false);
+
+FunctionPass *llvm::createConstantPropagationPass() {
+ return new ConstantPropagation();
+}
+
+
+bool ConstantPropagation::runOnFunction(Function &F) {
+ // Initialize the worklist to all of the instructions ready to process...
+ std::set<Instruction*> WorkList;
+ for(inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
+ WorkList.insert(&*i);
+ }
+ bool Changed = false;
+
+ while (!WorkList.empty()) {
+ Instruction *I = *WorkList.begin();
+ WorkList.erase(WorkList.begin()); // Get an element from the worklist...
+
+ if (!I->use_empty()) // Don't muck with dead instructions...
+ if (Constant *C = ConstantFoldInstruction(I)) {
+ // Add all of the users of this instruction to the worklist, they might
+ // be constant propagatable now...
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ WorkList.insert(cast<Instruction>(*UI));
+
+ // Replace all of the uses of a variable with uses of the constant.
+ I->replaceAllUsesWith(C);
+
+ // Remove the dead instruction.
+ WorkList.erase(I);
+ I->eraseFromParent();
+
+ // We made a change to the function...
+ Changed = true;
+ ++NumInstKilled;
+ }
+ }
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
new file mode 100644
index 0000000..0d4e45d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -0,0 +1,200 @@
+//===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Correlated Value Propagation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "correlated-value-propagation"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumPhis, "Number of phis propagated");
+STATISTIC(NumSelects, "Number of selects propagated");
+STATISTIC(NumMemAccess, "Number of memory access targets propagated");
+STATISTIC(NumCmps, "Number of comparisons propagated");
+
+namespace {
+ class CorrelatedValuePropagation : public FunctionPass {
+ LazyValueInfo *LVI;
+
+ bool processSelect(SelectInst *SI);
+ bool processPHI(PHINode *P);
+ bool processMemAccess(Instruction *I);
+ bool processCmp(CmpInst *C);
+
+ public:
+ static char ID;
+ CorrelatedValuePropagation(): FunctionPass(ID) { }
+
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LazyValueInfo>();
+ }
+ };
+}
+
+char CorrelatedValuePropagation::ID = 0;
+INITIALIZE_PASS(CorrelatedValuePropagation, "correlated-propagation",
+ "Value Propagation", false, false);
+
+// Public interface to the Value Propagation pass
+Pass *llvm::createCorrelatedValuePropagationPass() {
+ return new CorrelatedValuePropagation();
+}
+
+bool CorrelatedValuePropagation::processSelect(SelectInst *S) {
+ if (S->getType()->isVectorTy()) return false;
+ if (isa<Constant>(S->getOperand(0))) return false;
+
+ Constant *C = LVI->getConstant(S->getOperand(0), S->getParent());
+ if (!C) return false;
+
+ ConstantInt *CI = dyn_cast<ConstantInt>(C);
+ if (!CI) return false;
+
+ S->replaceAllUsesWith(S->getOperand(CI->isOne() ? 1 : 2));
+ S->eraseFromParent();
+
+ ++NumSelects;
+
+ return true;
+}
+
+bool CorrelatedValuePropagation::processPHI(PHINode *P) {
+ bool Changed = false;
+
+ BasicBlock *BB = P->getParent();
+ for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
+ Value *Incoming = P->getIncomingValue(i);
+ if (isa<Constant>(Incoming)) continue;
+
+ Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i),
+ P->getIncomingBlock(i),
+ BB);
+ if (!C) continue;
+
+ P->setIncomingValue(i, C);
+ Changed = true;
+ }
+
+ if (Value *ConstVal = P->hasConstantValue()) {
+ P->replaceAllUsesWith(ConstVal);
+ P->eraseFromParent();
+ Changed = true;
+ }
+
+ ++NumPhis;
+
+ return Changed;
+}
+
+bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
+ Value *Pointer = 0;
+ if (LoadInst *L = dyn_cast<LoadInst>(I))
+ Pointer = L->getPointerOperand();
+ else
+ Pointer = cast<StoreInst>(I)->getPointerOperand();
+
+ if (isa<Constant>(Pointer)) return false;
+
+ Constant *C = LVI->getConstant(Pointer, I->getParent());
+ if (!C) return false;
+
+ ++NumMemAccess;
+ I->replaceUsesOfWith(Pointer, C);
+ return true;
+}
+
+/// processCmp - If the value of this comparison could be determined locally,
+/// constant propagation would already have figured it out. Instead, walk
+/// the predecessors and statically evaluate the comparison based on information
+/// available on that edge. If a given static evaluation is true on ALL
+/// incoming edges, then it's true universally and we can simplify the compare.
+bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
+ Value *Op0 = C->getOperand(0);
+ if (isa<Instruction>(Op0) &&
+ cast<Instruction>(Op0)->getParent() == C->getParent())
+ return false;
+
+ Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
+ if (!Op1) return false;
+
+ pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent());
+ if (PI == PE) return false;
+
+ LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(),
+ C->getOperand(0), Op1, *PI, C->getParent());
+ if (Result == LazyValueInfo::Unknown) return false;
+
+ ++PI;
+ while (PI != PE) {
+ LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(),
+ C->getOperand(0), Op1, *PI, C->getParent());
+ if (Res != Result) return false;
+ ++PI;
+ }
+
+ ++NumCmps;
+
+ if (Result == LazyValueInfo::True)
+ C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext()));
+ else
+ C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext()));
+
+ C->eraseFromParent();
+
+ return true;
+}
+
+bool CorrelatedValuePropagation::runOnFunction(Function &F) {
+ LVI = &getAnalysis<LazyValueInfo>();
+
+ bool FnChanged = false;
+
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ bool BBChanged = false;
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
+ Instruction *II = BI++;
+ switch (II->getOpcode()) {
+ case Instruction::Select:
+ BBChanged |= processSelect(cast<SelectInst>(II));
+ break;
+ case Instruction::PHI:
+ BBChanged |= processPHI(cast<PHINode>(II));
+ break;
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ BBChanged |= processCmp(cast<CmpInst>(II));
+ break;
+ case Instruction::Load:
+ case Instruction::Store:
+ BBChanged |= processMemAccess(II);
+ break;
+ }
+ }
+
+ // Propagating correlated values might leave cruft around.
+ // Try to clean it up before we continue.
+ if (BBChanged)
+ SimplifyInstructionsInBlock(FI);
+
+ FnChanged |= BBChanged;
+ }
+
+ return FnChanged;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
new file mode 100644
index 0000000..87ea803
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -0,0 +1,132 @@
+//===- DCE.cpp - Code to perform dead code elimination --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements dead inst elimination and dead code elimination.
+//
+// Dead Inst Elimination performs a single pass over the function removing
+// instructions that are obviously dead. Dead Code Elimination is similar, but
+// it rechecks instructions that were used by removed instructions to see if
+// they are newly dead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dce"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Instruction.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(DIEEliminated, "Number of insts removed by DIE pass");
+STATISTIC(DCEEliminated, "Number of insts removed");
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ // DeadInstElimination pass implementation
+ //
+ struct DeadInstElimination : public BasicBlockPass {
+ static char ID; // Pass identification, replacement for typeid
+ DeadInstElimination() : BasicBlockPass(ID) {}
+ virtual bool runOnBasicBlock(BasicBlock &BB) {
+ bool Changed = false;
+ for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
+ Instruction *Inst = DI++;
+ if (isInstructionTriviallyDead(Inst)) {
+ Inst->eraseFromParent();
+ Changed = true;
+ ++DIEEliminated;
+ }
+ }
+ return Changed;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+ };
+}
+
+char DeadInstElimination::ID = 0;
+INITIALIZE_PASS(DeadInstElimination, "die",
+ "Dead Instruction Elimination", false, false);
+
+Pass *llvm::createDeadInstEliminationPass() {
+ return new DeadInstElimination();
+}
+
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ // DeadCodeElimination pass implementation
+ //
+ struct DCE : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ DCE() : FunctionPass(ID) {}
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+ };
+}
+
+char DCE::ID = 0;
+INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false);
+
+bool DCE::runOnFunction(Function &F) {
+ // Start out with all of the instructions in the worklist...
+ std::vector<Instruction*> WorkList;
+ for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i)
+ WorkList.push_back(&*i);
+
+ // Loop over the worklist finding instructions that are dead. If they are
+ // dead make them drop all of their uses, making other instructions
+ // potentially dead, and work until the worklist is empty.
+ //
+ bool MadeChange = false;
+ while (!WorkList.empty()) {
+ Instruction *I = WorkList.back();
+ WorkList.pop_back();
+
+ if (isInstructionTriviallyDead(I)) { // If the instruction is dead.
+ // Loop over all of the values that the instruction uses, if there are
+ // instructions being used, add them to the worklist, because they might
+ // go dead after this one is removed.
+ //
+ for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+ if (Instruction *Used = dyn_cast<Instruction>(*OI))
+ WorkList.push_back(Used);
+
+ // Remove the instruction.
+ I->eraseFromParent();
+
+ // Remove the instruction from the worklist if it still exists in it.
+ for (std::vector<Instruction*>::iterator WI = WorkList.begin();
+ WI != WorkList.end(); ) {
+ if (*WI == I)
+ WI = WorkList.erase(WI);
+ else
+ ++WI;
+ }
+
+ MadeChange = true;
+ ++DCEEliminated;
+ }
+ }
+ return MadeChange;
+}
+
+FunctionPass *llvm::createDeadCodeEliminationPass() {
+ return new DCE();
+}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
new file mode 100644
index 0000000..c8fd9d9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -0,0 +1,575 @@
+//===- DeadStoreElimination.cpp - Fast Dead Store Elimination -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a trivial dead store elimination that only considers
+// basic-block local redundant stores.
+//
+// FIXME: This should eventually be extended to be a post-dominator tree
+// traversal. Doing so would be pretty trivial.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dse"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+STATISTIC(NumFastStores, "Number of stores deleted");
+STATISTIC(NumFastOther , "Number of other instrs removed");
+
+namespace {
+ struct DSE : public FunctionPass {
+ TargetData *TD;
+
+ static char ID; // Pass identification, replacement for typeid
+ DSE() : FunctionPass(ID) {}
+
+ virtual bool runOnFunction(Function &F) {
+ bool Changed = false;
+
+ DominatorTree &DT = getAnalysis<DominatorTree>();
+
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ // Only check non-dead blocks. Dead blocks may have strange pointer
+ // cycles that will confuse alias analysis.
+ if (DT.isReachableFromEntry(I))
+ Changed |= runOnBasicBlock(*I);
+ return Changed;
+ }
+
+ bool runOnBasicBlock(BasicBlock &BB);
+ bool handleFreeWithNonTrivialDependency(const CallInst *F,
+ MemDepResult Dep);
+ bool handleEndBlock(BasicBlock &BB);
+ bool RemoveUndeadPointers(Value *Ptr, uint64_t killPointerSize,
+ BasicBlock::iterator &BBI,
+ SmallPtrSet<Value*, 64> &deadPointers);
+ void DeleteDeadInstruction(Instruction *I,
+ SmallPtrSet<Value*, 64> *deadPointers = 0);
+
+
+ // getAnalysisUsage - We require post dominance frontiers (aka Control
+ // Dependence Graph)
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<MemoryDependenceAnalysis>();
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<MemoryDependenceAnalysis>();
+ }
+
+ unsigned getPointerSize(Value *V) const;
+ };
+}
+
+char DSE::ID = 0;
+INITIALIZE_PASS(DSE, "dse", "Dead Store Elimination", false, false);
+
+FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
+
+/// doesClobberMemory - Does this instruction clobber (write without reading)
+/// some memory?
+static bool doesClobberMemory(Instruction *I) {
+ if (isa<StoreInst>(I))
+ return true;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default:
+ return false;
+ case Intrinsic::memset:
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ case Intrinsic::init_trampoline:
+ case Intrinsic::lifetime_end:
+ return true;
+ }
+ }
+ return false;
+}
+
+/// isElidable - If the value of this instruction and the memory it writes to is
+/// unused, may we delete this instrtction?
+static bool isElidable(Instruction *I) {
+ assert(doesClobberMemory(I));
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ return II->getIntrinsicID() != Intrinsic::lifetime_end;
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return !SI->isVolatile();
+ return true;
+}
+
+/// getPointerOperand - Return the pointer that is being clobbered.
+static Value *getPointerOperand(Instruction *I) {
+ assert(doesClobberMemory(I));
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->getPointerOperand();
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
+ return MI->getArgOperand(0);
+
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
+ default: assert(false && "Unexpected intrinsic!");
+ case Intrinsic::init_trampoline:
+ return II->getArgOperand(0);
+ case Intrinsic::lifetime_end:
+ return II->getArgOperand(1);
+ }
+}
+
+/// getStoreSize - Return the length in bytes of the write by the clobbering
+/// instruction. If variable or unknown, returns -1.
+static unsigned getStoreSize(Instruction *I, const TargetData *TD) {
+ assert(doesClobberMemory(I));
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (!TD) return -1u;
+ return TD->getTypeStoreSize(SI->getOperand(0)->getType());
+ }
+
+ Value *Len;
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+ Len = MI->getLength();
+ } else {
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
+ default: assert(false && "Unexpected intrinsic!");
+ case Intrinsic::init_trampoline:
+ return -1u;
+ case Intrinsic::lifetime_end:
+ Len = II->getArgOperand(0);
+ break;
+ }
+ }
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(Len))
+ if (!LenCI->isAllOnesValue())
+ return LenCI->getZExtValue();
+ return -1u;
+}
+
+/// isStoreAtLeastAsWideAs - Return true if the size of the store in I1 is
+/// greater than or equal to the store in I2. This returns false if we don't
+/// know.
+///
+static bool isStoreAtLeastAsWideAs(Instruction *I1, Instruction *I2,
+ const TargetData *TD) {
+ const Type *I1Ty = getPointerOperand(I1)->getType();
+ const Type *I2Ty = getPointerOperand(I2)->getType();
+
+ // Exactly the same type, must have exactly the same size.
+ if (I1Ty == I2Ty) return true;
+
+ int I1Size = getStoreSize(I1, TD);
+ int I2Size = getStoreSize(I2, TD);
+
+ return I1Size != -1 && I2Size != -1 && I1Size >= I2Size;
+}
+
+bool DSE::runOnBasicBlock(BasicBlock &BB) {
+ MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
+ TD = getAnalysisIfAvailable<TargetData>();
+
+ bool MadeChange = false;
+
+ // Do a top-down walk on the BB.
+ for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
+ Instruction *Inst = BBI++;
+
+ // If we find a store or a free, get its memory dependence.
+ if (!doesClobberMemory(Inst) && !isFreeCall(Inst))
+ continue;
+
+ MemDepResult InstDep = MD.getDependency(Inst);
+
+ // Ignore non-local stores.
+ // FIXME: cross-block DSE would be fun. :)
+ if (InstDep.isNonLocal()) continue;
+
+ // Handle frees whose dependencies are non-trivial.
+ if (const CallInst *F = isFreeCall(Inst)) {
+ MadeChange |= handleFreeWithNonTrivialDependency(F, InstDep);
+ continue;
+ }
+
+ // If not a definite must-alias dependency, ignore it.
+ if (!InstDep.isDef())
+ continue;
+
+ // If this is a store-store dependence, then the previous store is dead so
+ // long as this store is at least as big as it.
+ if (doesClobberMemory(InstDep.getInst())) {
+ Instruction *DepStore = InstDep.getInst();
+ if (isStoreAtLeastAsWideAs(Inst, DepStore, TD) &&
+ isElidable(DepStore)) {
+ // Delete the store and now-dead instructions that feed it.
+ DeleteDeadInstruction(DepStore);
+ ++NumFastStores;
+ MadeChange = true;
+
+ // DeleteDeadInstruction can delete the current instruction in loop
+ // cases, reset BBI.
+ BBI = Inst;
+ if (BBI != BB.begin())
+ --BBI;
+ continue;
+ }
+ }
+
+ if (!isElidable(Inst))
+ continue;
+
+ // If we're storing the same value back to a pointer that we just
+ // loaded from, then the store can be removed.
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
+ if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
+ SI->getOperand(0) == DepLoad) {
+ // DeleteDeadInstruction can delete the current instruction. Save BBI
+ // in case we need it.
+ WeakVH NextInst(BBI);
+
+ DeleteDeadInstruction(SI);
+
+ if (NextInst == 0) // Next instruction deleted.
+ BBI = BB.begin();
+ else if (BBI != BB.begin()) // Revisit this instruction if possible.
+ --BBI;
+ ++NumFastStores;
+ MadeChange = true;
+ continue;
+ }
+ }
+ }
+
+ // If this is a lifetime end marker, we can throw away the store.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(InstDep.getInst())) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ // Delete the store and now-dead instructions that feed it.
+ // DeleteDeadInstruction can delete the current instruction. Save BBI
+ // in case we need it.
+ WeakVH NextInst(BBI);
+
+ DeleteDeadInstruction(Inst);
+
+ if (NextInst == 0) // Next instruction deleted.
+ BBI = BB.begin();
+ else if (BBI != BB.begin()) // Revisit this instruction if possible.
+ --BBI;
+ ++NumFastStores;
+ MadeChange = true;
+ continue;
+ }
+ }
+ }
+
+ // If this block ends in a return, unwind, or unreachable, all allocas are
+ // dead at its end, which means stores to them are also dead.
+ if (BB.getTerminator()->getNumSuccessors() == 0)
+ MadeChange |= handleEndBlock(BB);
+
+ return MadeChange;
+}
+
+/// handleFreeWithNonTrivialDependency - Handle frees of entire structures whose
+/// dependency is a store to a field of that structure.
+bool DSE::handleFreeWithNonTrivialDependency(const CallInst *F,
+ MemDepResult Dep) {
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ Instruction *Dependency = Dep.getInst();
+ if (!Dependency || !doesClobberMemory(Dependency) || !isElidable(Dependency))
+ return false;
+
+ Value *DepPointer = getPointerOperand(Dependency)->getUnderlyingObject();
+
+ // Check for aliasing.
+ if (AA.alias(F->getArgOperand(0), 1, DepPointer, 1) !=
+ AliasAnalysis::MustAlias)
+ return false;
+
+ // DCE instructions only used to calculate that store
+ DeleteDeadInstruction(Dependency);
+ ++NumFastStores;
+ return true;
+}
+
+/// handleEndBlock - Remove dead stores to stack-allocated locations in the
+/// function end block. Ex:
+/// %A = alloca i32
+/// ...
+/// store i32 1, i32* %A
+/// ret void
+bool DSE::handleEndBlock(BasicBlock &BB) {
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ bool MadeChange = false;
+
+ // Pointers alloca'd in this function are dead in the end block
+ SmallPtrSet<Value*, 64> deadPointers;
+
+ // Find all of the alloca'd pointers in the entry block.
+ BasicBlock *Entry = BB.getParent()->begin();
+ for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+ deadPointers.insert(AI);
+
+ // Treat byval arguments the same, stores to them are dead at the end of the
+ // function.
+ for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
+ AE = BB.getParent()->arg_end(); AI != AE; ++AI)
+ if (AI->hasByValAttr())
+ deadPointers.insert(AI);
+
+ // Scan the basic block backwards
+ for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
+ --BBI;
+
+ // If we find a store whose pointer is dead.
+ if (doesClobberMemory(BBI)) {
+ if (isElidable(BBI)) {
+ // See through pointer-to-pointer bitcasts
+ Value *pointerOperand = getPointerOperand(BBI)->getUnderlyingObject();
+
+ // Alloca'd pointers or byval arguments (which are functionally like
+ // alloca's) are valid candidates for removal.
+ if (deadPointers.count(pointerOperand)) {
+ // DCE instructions only used to calculate that store.
+ Instruction *Dead = BBI;
+ ++BBI;
+ DeleteDeadInstruction(Dead, &deadPointers);
+ ++NumFastStores;
+ MadeChange = true;
+ continue;
+ }
+ }
+
+ // Because a memcpy or memmove is also a load, we can't skip it if we
+ // didn't remove it.
+ if (!isa<MemTransferInst>(BBI))
+ continue;
+ }
+
+ Value *killPointer = 0;
+ uint64_t killPointerSize = ~0UL;
+
+ // If we encounter a use of the pointer, it is no longer considered dead
+ if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
+ // However, if this load is unused and not volatile, we can go ahead and
+ // remove it, and not have to worry about it making our pointer undead!
+ if (L->use_empty() && !L->isVolatile()) {
+ ++BBI;
+ DeleteDeadInstruction(L, &deadPointers);
+ ++NumFastOther;
+ MadeChange = true;
+ continue;
+ }
+
+ killPointer = L->getPointerOperand();
+ } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
+ killPointer = V->getOperand(0);
+ } else if (isa<MemTransferInst>(BBI) &&
+ isa<ConstantInt>(cast<MemTransferInst>(BBI)->getLength())) {
+ killPointer = cast<MemTransferInst>(BBI)->getSource();
+ killPointerSize = cast<ConstantInt>(
+ cast<MemTransferInst>(BBI)->getLength())->getZExtValue();
+ } else if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) {
+ deadPointers.erase(A);
+
+ // Dead alloca's can be DCE'd when we reach them
+ if (A->use_empty()) {
+ ++BBI;
+ DeleteDeadInstruction(A, &deadPointers);
+ ++NumFastOther;
+ MadeChange = true;
+ }
+
+ continue;
+ } else if (CallSite CS = cast<Value>(BBI)) {
+ // If this call does not access memory, it can't
+ // be undeadifying any of our pointers.
+ if (AA.doesNotAccessMemory(CS))
+ continue;
+
+ unsigned modRef = 0;
+ unsigned other = 0;
+
+ // Remove any pointers made undead by the call from the dead set
+ std::vector<Value*> dead;
+ for (SmallPtrSet<Value*, 64>::iterator I = deadPointers.begin(),
+ E = deadPointers.end(); I != E; ++I) {
+ // HACK: if we detect that our AA is imprecise, it's not
+ // worth it to scan the rest of the deadPointers set. Just
+ // assume that the AA will return ModRef for everything, and
+ // go ahead and bail.
+ if (modRef >= 16 && other == 0) {
+ deadPointers.clear();
+ return MadeChange;
+ }
+
+ // See if the call site touches it
+ AliasAnalysis::ModRefResult A = AA.getModRefInfo(CS, *I,
+ getPointerSize(*I));
+
+ if (A == AliasAnalysis::ModRef)
+ ++modRef;
+ else
+ ++other;
+
+ if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
+ dead.push_back(*I);
+ }
+
+ for (std::vector<Value*>::iterator I = dead.begin(), E = dead.end();
+ I != E; ++I)
+ deadPointers.erase(*I);
+
+ continue;
+ } else if (isInstructionTriviallyDead(BBI)) {
+ // For any non-memory-affecting non-terminators, DCE them as we reach them
+ Instruction *Inst = BBI;
+ ++BBI;
+ DeleteDeadInstruction(Inst, &deadPointers);
+ ++NumFastOther;
+ MadeChange = true;
+ continue;
+ }
+
+ if (!killPointer)
+ continue;
+
+ killPointer = killPointer->getUnderlyingObject();
+
+ // Deal with undead pointers
+ MadeChange |= RemoveUndeadPointers(killPointer, killPointerSize, BBI,
+ deadPointers);
+ }
+
+ return MadeChange;
+}
+
+/// RemoveUndeadPointers - check for uses of a pointer that make it
+/// undead when scanning for dead stores to alloca's.
+bool DSE::RemoveUndeadPointers(Value *killPointer, uint64_t killPointerSize,
+ BasicBlock::iterator &BBI,
+ SmallPtrSet<Value*, 64> &deadPointers) {
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ // If the kill pointer can be easily reduced to an alloca,
+ // don't bother doing extraneous AA queries.
+ if (deadPointers.count(killPointer)) {
+ deadPointers.erase(killPointer);
+ return false;
+ }
+
+ // A global can't be in the dead pointer set.
+ if (isa<GlobalValue>(killPointer))
+ return false;
+
+ bool MadeChange = false;
+
+ SmallVector<Value*, 16> undead;
+
+ for (SmallPtrSet<Value*, 64>::iterator I = deadPointers.begin(),
+ E = deadPointers.end(); I != E; ++I) {
+ // See if this pointer could alias it
+ AliasAnalysis::AliasResult A = AA.alias(*I, getPointerSize(*I),
+ killPointer, killPointerSize);
+
+ // If it must-alias and a store, we can delete it
+ if (isa<StoreInst>(BBI) && A == AliasAnalysis::MustAlias) {
+ StoreInst *S = cast<StoreInst>(BBI);
+
+ // Remove it!
+ ++BBI;
+ DeleteDeadInstruction(S, &deadPointers);
+ ++NumFastStores;
+ MadeChange = true;
+
+ continue;
+
+ // Otherwise, it is undead
+ } else if (A != AliasAnalysis::NoAlias)
+ undead.push_back(*I);
+ }
+
+ for (SmallVector<Value*, 16>::iterator I = undead.begin(), E = undead.end();
+ I != E; ++I)
+ deadPointers.erase(*I);
+
+ return MadeChange;
+}
+
+/// DeleteDeadInstruction - Delete this instruction. Before we do, go through
+/// and zero out all the operands of this instruction. If any of them become
+/// dead, delete them and the computation tree that feeds them.
+///
+/// If ValueSet is non-null, remove any deleted instructions from it as well.
+///
+void DSE::DeleteDeadInstruction(Instruction *I,
+ SmallPtrSet<Value*, 64> *ValueSet) {
+ SmallVector<Instruction*, 32> NowDeadInsts;
+
+ NowDeadInsts.push_back(I);
+ --NumFastOther;
+
+ // Before we touch this instruction, remove it from memdep!
+ MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>();
+ do {
+ Instruction *DeadInst = NowDeadInsts.pop_back_val();
+
+ ++NumFastOther;
+
+ // This instruction is dead, zap it, in stages. Start by removing it from
+ // MemDep, which needs to know the operands and needs it to be in the
+ // function.
+ MDA.removeInstruction(DeadInst);
+
+ for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
+ Value *Op = DeadInst->getOperand(op);
+ DeadInst->setOperand(op, 0);
+
+ // If this operand just became dead, add it to the NowDeadInsts list.
+ if (!Op->use_empty()) continue;
+
+ if (Instruction *OpI = dyn_cast<Instruction>(Op))
+ if (isInstructionTriviallyDead(OpI))
+ NowDeadInsts.push_back(OpI);
+ }
+
+ DeadInst->eraseFromParent();
+
+ if (ValueSet) ValueSet->erase(DeadInst);
+ } while (!NowDeadInsts.empty());
+}
+
+unsigned DSE::getPointerSize(Value *V) const {
+ if (TD) {
+ if (AllocaInst *A = dyn_cast<AllocaInst>(V)) {
+ // Get size information for the alloca
+ if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize()))
+ return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType());
+ } else {
+ assert(isa<Argument>(V) && "Expected AllocaInst or Argument!");
+ const PointerType *PT = cast<PointerType>(V->getType());
+ return TD->getTypeAllocSize(PT->getElementType());
+ }
+ }
+ return ~0U;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp b/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp
new file mode 100644
index 0000000..53dd06d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp
@@ -0,0 +1,81 @@
+//===- GEPSplitter.cpp - Split complex GEPs into simple ones --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This function breaks GEPs with more than 2 non-zero operands into smaller
+// GEPs each with no more than 2 non-zero operands. This exposes redundancy
+// between GEPs with common initial operand sequences.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "split-geps"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+ class GEPSplitter : public FunctionPass {
+ virtual bool runOnFunction(Function &F);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit GEPSplitter() : FunctionPass(ID) {}
+ };
+}
+
+char GEPSplitter::ID = 0;
+INITIALIZE_PASS(GEPSplitter, "split-geps",
+ "split complex GEPs into simple GEPs", false, false);
+
+FunctionPass *llvm::createGEPSplitterPass() {
+ return new GEPSplitter();
+}
+
+bool GEPSplitter::runOnFunction(Function &F) {
+ bool Changed = false;
+
+ // Visit each GEP instruction.
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ for (BasicBlock::iterator II = I->begin(), IE = I->end(); II != IE; )
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(II++)) {
+ unsigned NumOps = GEP->getNumOperands();
+ // Ignore GEPs which are already simple.
+ if (NumOps <= 2)
+ continue;
+ bool FirstIndexIsZero = isa<ConstantInt>(GEP->getOperand(1)) &&
+ cast<ConstantInt>(GEP->getOperand(1))->isZero();
+ if (NumOps == 3 && FirstIndexIsZero)
+ continue;
+ // The first index is special and gets expanded with a 2-operand GEP
+ // (unless it's zero, in which case we can skip this).
+ Value *NewGEP = FirstIndexIsZero ?
+ GEP->getOperand(0) :
+ GetElementPtrInst::Create(GEP->getOperand(0), GEP->getOperand(1),
+ "tmp", GEP);
+ // All remaining indices get expanded with a 3-operand GEP with zero
+ // as the second operand.
+ Value *Idxs[2];
+ Idxs[0] = ConstantInt::get(Type::getInt64Ty(F.getContext()), 0);
+ for (unsigned i = 2; i != NumOps; ++i) {
+ Idxs[1] = GEP->getOperand(i);
+ NewGEP = GetElementPtrInst::Create(NewGEP, Idxs, Idxs+2, "tmp", GEP);
+ }
+ GEP->replaceAllUsesWith(NewGEP);
+ GEP->eraseFromParent();
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+void GEPSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
new file mode 100644
index 0000000..c62ce1f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -0,0 +1,2319 @@
+//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs global value numbering to eliminate fully redundant
+// instructions. It also performs simple dead load elimination.
+//
+// Note that this pass does the value numbering itself; it does not use the
+// ValueNumbering analysis passes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "gvn"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
+#include "llvm/Value.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+using namespace llvm;
+
+STATISTIC(NumGVNInstr, "Number of instructions deleted");
+STATISTIC(NumGVNLoad, "Number of loads deleted");
+STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
+STATISTIC(NumGVNBlocks, "Number of blocks merged");
+STATISTIC(NumPRELoad, "Number of loads PRE'd");
+
+static cl::opt<bool> EnablePRE("enable-pre",
+ cl::init(true), cl::Hidden);
+static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
+static cl::opt<bool> EnableFullLoadPRE("enable-full-load-pre", cl::init(false));
+
+//===----------------------------------------------------------------------===//
+// ValueTable Class
+//===----------------------------------------------------------------------===//
+
+/// This class holds the mapping between values and value numbers. It is used
+/// as an efficient mechanism to determine the expression-wise equivalence of
+/// two values.
+namespace {
+ struct Expression {
+ enum ExpressionOpcode {
+ ADD = Instruction::Add,
+ FADD = Instruction::FAdd,
+ SUB = Instruction::Sub,
+ FSUB = Instruction::FSub,
+ MUL = Instruction::Mul,
+ FMUL = Instruction::FMul,
+ UDIV = Instruction::UDiv,
+ SDIV = Instruction::SDiv,
+ FDIV = Instruction::FDiv,
+ UREM = Instruction::URem,
+ SREM = Instruction::SRem,
+ FREM = Instruction::FRem,
+ SHL = Instruction::Shl,
+ LSHR = Instruction::LShr,
+ ASHR = Instruction::AShr,
+ AND = Instruction::And,
+ OR = Instruction::Or,
+ XOR = Instruction::Xor,
+ TRUNC = Instruction::Trunc,
+ ZEXT = Instruction::ZExt,
+ SEXT = Instruction::SExt,
+ FPTOUI = Instruction::FPToUI,
+ FPTOSI = Instruction::FPToSI,
+ UITOFP = Instruction::UIToFP,
+ SITOFP = Instruction::SIToFP,
+ FPTRUNC = Instruction::FPTrunc,
+ FPEXT = Instruction::FPExt,
+ PTRTOINT = Instruction::PtrToInt,
+ INTTOPTR = Instruction::IntToPtr,
+ BITCAST = Instruction::BitCast,
+ ICMPEQ, ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
+ ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
+ FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
+ FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
+ FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
+ SHUFFLE, SELECT, GEP, CALL, CONSTANT,
+ INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE };
+
+ ExpressionOpcode opcode;
+ const Type* type;
+ SmallVector<uint32_t, 4> varargs;
+ Value *function;
+
+ Expression() { }
+ Expression(ExpressionOpcode o) : opcode(o) { }
+
+ bool operator==(const Expression &other) const {
+ if (opcode != other.opcode)
+ return false;
+ else if (opcode == EMPTY || opcode == TOMBSTONE)
+ return true;
+ else if (type != other.type)
+ return false;
+ else if (function != other.function)
+ return false;
+ else {
+ if (varargs.size() != other.varargs.size())
+ return false;
+
+ for (size_t i = 0; i < varargs.size(); ++i)
+ if (varargs[i] != other.varargs[i])
+ return false;
+
+ return true;
+ }
+ }
+
+ bool operator!=(const Expression &other) const {
+ return !(*this == other);
+ }
+ };
+
+ class ValueTable {
+ private:
+ DenseMap<Value*, uint32_t> valueNumbering;
+ DenseMap<Expression, uint32_t> expressionNumbering;
+ AliasAnalysis* AA;
+ MemoryDependenceAnalysis* MD;
+ DominatorTree* DT;
+
+ uint32_t nextValueNumber;
+
+ Expression::ExpressionOpcode getOpcode(CmpInst* C);
+ Expression create_expression(BinaryOperator* BO);
+ Expression create_expression(CmpInst* C);
+ Expression create_expression(ShuffleVectorInst* V);
+ Expression create_expression(ExtractElementInst* C);
+ Expression create_expression(InsertElementInst* V);
+ Expression create_expression(SelectInst* V);
+ Expression create_expression(CastInst* C);
+ Expression create_expression(GetElementPtrInst* G);
+ Expression create_expression(CallInst* C);
+ Expression create_expression(ExtractValueInst* C);
+ Expression create_expression(InsertValueInst* C);
+
+ uint32_t lookup_or_add_call(CallInst* C);
+ public:
+ ValueTable() : nextValueNumber(1) { }
+ uint32_t lookup_or_add(Value *V);
+ uint32_t lookup(Value *V) const;
+ void add(Value *V, uint32_t num);
+ void clear();
+ void erase(Value *v);
+ unsigned size();
+ void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
+ AliasAnalysis *getAliasAnalysis() const { return AA; }
+ void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
+ void setDomTree(DominatorTree* D) { DT = D; }
+ uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
+ void verifyRemoved(const Value *) const;
+ };
+}
+
+namespace llvm {
+template <> struct DenseMapInfo<Expression> {
+ static inline Expression getEmptyKey() {
+ return Expression(Expression::EMPTY);
+ }
+
+ static inline Expression getTombstoneKey() {
+ return Expression(Expression::TOMBSTONE);
+ }
+
+ static unsigned getHashValue(const Expression e) {
+ unsigned hash = e.opcode;
+
+ hash = ((unsigned)((uintptr_t)e.type >> 4) ^
+ (unsigned)((uintptr_t)e.type >> 9));
+
+ for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
+ E = e.varargs.end(); I != E; ++I)
+ hash = *I + hash * 37;
+
+ hash = ((unsigned)((uintptr_t)e.function >> 4) ^
+ (unsigned)((uintptr_t)e.function >> 9)) +
+ hash * 37;
+
+ return hash;
+ }
+ static bool isEqual(const Expression &LHS, const Expression &RHS) {
+ return LHS == RHS;
+ }
+};
+
+template <>
+struct isPodLike<Expression> { static const bool value = true; };
+
+}
+
+//===----------------------------------------------------------------------===//
+// ValueTable Internal Functions
+//===----------------------------------------------------------------------===//
+
+Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
+ if (isa<ICmpInst>(C)) {
+ switch (C->getPredicate()) {
+ default: // THIS SHOULD NEVER HAPPEN
+ llvm_unreachable("Comparison with unknown predicate?");
+ case ICmpInst::ICMP_EQ: return Expression::ICMPEQ;
+ case ICmpInst::ICMP_NE: return Expression::ICMPNE;
+ case ICmpInst::ICMP_UGT: return Expression::ICMPUGT;
+ case ICmpInst::ICMP_UGE: return Expression::ICMPUGE;
+ case ICmpInst::ICMP_ULT: return Expression::ICMPULT;
+ case ICmpInst::ICMP_ULE: return Expression::ICMPULE;
+ case ICmpInst::ICMP_SGT: return Expression::ICMPSGT;
+ case ICmpInst::ICMP_SGE: return Expression::ICMPSGE;
+ case ICmpInst::ICMP_SLT: return Expression::ICMPSLT;
+ case ICmpInst::ICMP_SLE: return Expression::ICMPSLE;
+ }
+ } else {
+ switch (C->getPredicate()) {
+ default: // THIS SHOULD NEVER HAPPEN
+ llvm_unreachable("Comparison with unknown predicate?");
+ case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ;
+ case FCmpInst::FCMP_OGT: return Expression::FCMPOGT;
+ case FCmpInst::FCMP_OGE: return Expression::FCMPOGE;
+ case FCmpInst::FCMP_OLT: return Expression::FCMPOLT;
+ case FCmpInst::FCMP_OLE: return Expression::FCMPOLE;
+ case FCmpInst::FCMP_ONE: return Expression::FCMPONE;
+ case FCmpInst::FCMP_ORD: return Expression::FCMPORD;
+ case FCmpInst::FCMP_UNO: return Expression::FCMPUNO;
+ case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ;
+ case FCmpInst::FCMP_UGT: return Expression::FCMPUGT;
+ case FCmpInst::FCMP_UGE: return Expression::FCMPUGE;
+ case FCmpInst::FCMP_ULT: return Expression::FCMPULT;
+ case FCmpInst::FCMP_ULE: return Expression::FCMPULE;
+ case FCmpInst::FCMP_UNE: return Expression::FCMPUNE;
+ }
+ }
+}
+
+Expression ValueTable::create_expression(CallInst* C) {
+ Expression e;
+
+ e.type = C->getType();
+ e.function = C->getCalledFunction();
+ e.opcode = Expression::CALL;
+
+ CallSite CS(C);
+ for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end();
+ I != E; ++I)
+ e.varargs.push_back(lookup_or_add(*I));
+
+ return e;
+}
+
+Expression ValueTable::create_expression(BinaryOperator* BO) {
+ Expression e;
+ e.varargs.push_back(lookup_or_add(BO->getOperand(0)));
+ e.varargs.push_back(lookup_or_add(BO->getOperand(1)));
+ e.function = 0;
+ e.type = BO->getType();
+ e.opcode = static_cast<Expression::ExpressionOpcode>(BO->getOpcode());
+
+ return e;
+}
+
+Expression ValueTable::create_expression(CmpInst* C) {
+ Expression e;
+
+ e.varargs.push_back(lookup_or_add(C->getOperand(0)));
+ e.varargs.push_back(lookup_or_add(C->getOperand(1)));
+ e.function = 0;
+ e.type = C->getType();
+ e.opcode = getOpcode(C);
+
+ return e;
+}
+
+Expression ValueTable::create_expression(CastInst* C) {
+ Expression e;
+
+ e.varargs.push_back(lookup_or_add(C->getOperand(0)));
+ e.function = 0;
+ e.type = C->getType();
+ e.opcode = static_cast<Expression::ExpressionOpcode>(C->getOpcode());
+
+ return e;
+}
+
+Expression ValueTable::create_expression(ShuffleVectorInst* S) {
+ Expression e;
+
+ e.varargs.push_back(lookup_or_add(S->getOperand(0)));
+ e.varargs.push_back(lookup_or_add(S->getOperand(1)));
+ e.varargs.push_back(lookup_or_add(S->getOperand(2)));
+ e.function = 0;
+ e.type = S->getType();
+ e.opcode = Expression::SHUFFLE;
+
+ return e;
+}
+
+Expression ValueTable::create_expression(ExtractElementInst* E) {
+ Expression e;
+
+ e.varargs.push_back(lookup_or_add(E->getOperand(0)));
+ e.varargs.push_back(lookup_or_add(E->getOperand(1)));
+ e.function = 0;
+ e.type = E->getType();
+ e.opcode = Expression::EXTRACT;
+
+ return e;
+}
+
+Expression ValueTable::create_expression(InsertElementInst* I) {
+ Expression e;
+
+ e.varargs.push_back(lookup_or_add(I->getOperand(0)));
+ e.varargs.push_back(lookup_or_add(I->getOperand(1)));
+ e.varargs.push_back(lookup_or_add(I->getOperand(2)));
+ e.function = 0;
+ e.type = I->getType();
+ e.opcode = Expression::INSERT;
+
+ return e;
+}
+
+Expression ValueTable::create_expression(SelectInst* I) {
+ Expression e;
+
+ e.varargs.push_back(lookup_or_add(I->getCondition()));
+ e.varargs.push_back(lookup_or_add(I->getTrueValue()));
+ e.varargs.push_back(lookup_or_add(I->getFalseValue()));
+ e.function = 0;
+ e.type = I->getType();
+ e.opcode = Expression::SELECT;
+
+ return e;
+}
+
+Expression ValueTable::create_expression(GetElementPtrInst* G) {
+ Expression e;
+
+ e.varargs.push_back(lookup_or_add(G->getPointerOperand()));
+ e.function = 0;
+ e.type = G->getType();
+ e.opcode = Expression::GEP;
+
+ for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
+ I != E; ++I)
+ e.varargs.push_back(lookup_or_add(*I));
+
+ return e;
+}
+
+Expression ValueTable::create_expression(ExtractValueInst* E) {
+ Expression e;
+
+ e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
+ for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
+ II != IE; ++II)
+ e.varargs.push_back(*II);
+ e.function = 0;
+ e.type = E->getType();
+ e.opcode = Expression::EXTRACTVALUE;
+
+ return e;
+}
+
+Expression ValueTable::create_expression(InsertValueInst* E) {
+ Expression e;
+
+ e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
+ e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand()));
+ for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
+ II != IE; ++II)
+ e.varargs.push_back(*II);
+ e.function = 0;
+ e.type = E->getType();
+ e.opcode = Expression::INSERTVALUE;
+
+ return e;
+}
+
+//===----------------------------------------------------------------------===//
+// ValueTable External Functions
+//===----------------------------------------------------------------------===//
+
+/// add - Insert a value into the table with a specified value number.
+void ValueTable::add(Value *V, uint32_t num) {
+ valueNumbering.insert(std::make_pair(V, num));
+}
+
+uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
+ if (AA->doesNotAccessMemory(C)) {
+ Expression exp = create_expression(C);
+ uint32_t& e = expressionNumbering[exp];
+ if (!e) e = nextValueNumber++;
+ valueNumbering[C] = e;
+ return e;
+ } else if (AA->onlyReadsMemory(C)) {
+ Expression exp = create_expression(C);
+ uint32_t& e = expressionNumbering[exp];
+ if (!e) {
+ e = nextValueNumber++;
+ valueNumbering[C] = e;
+ return e;
+ }
+ if (!MD) {
+ e = nextValueNumber++;
+ valueNumbering[C] = e;
+ return e;
+ }
+
+ MemDepResult local_dep = MD->getDependency(C);
+
+ if (!local_dep.isDef() && !local_dep.isNonLocal()) {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ if (local_dep.isDef()) {
+ CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
+
+ if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+ uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
+ uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i));
+ if (c_vn != cd_vn) {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+ }
+
+ uint32_t v = lookup_or_add(local_cdep);
+ valueNumbering[C] = v;
+ return v;
+ }
+
+ // Non-local case.
+ const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
+ MD->getNonLocalCallDependency(CallSite(C));
+ // FIXME: call/call dependencies for readonly calls should return def, not
+ // clobber! Move the checking logic to MemDep!
+ CallInst* cdep = 0;
+
+ // Check to see if we have a single dominating call instruction that is
+ // identical to C.
+ for (unsigned i = 0, e = deps.size(); i != e; ++i) {
+ const NonLocalDepEntry *I = &deps[i];
+ // Ignore non-local dependencies.
+ if (I->getResult().isNonLocal())
+ continue;
+
+ // We don't handle non-depedencies. If we already have a call, reject
+ // instruction dependencies.
+ if (I->getResult().isClobber() || cdep != 0) {
+ cdep = 0;
+ break;
+ }
+
+ CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst());
+ // FIXME: All duplicated with non-local case.
+ if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){
+ cdep = NonLocalDepCall;
+ continue;
+ }
+
+ cdep = 0;
+ break;
+ }
+
+ if (!cdep) {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+ for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+ uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
+ uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i));
+ if (c_vn != cd_vn) {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+ }
+
+ uint32_t v = lookup_or_add(cdep);
+ valueNumbering[C] = v;
+ return v;
+
+ } else {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+}
+
+/// lookup_or_add - Returns the value number for the specified value, assigning
+/// it a new number if it did not have one before.
+uint32_t ValueTable::lookup_or_add(Value *V) {
+ DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
+ if (VI != valueNumbering.end())
+ return VI->second;
+
+ if (!isa<Instruction>(V)) {
+ valueNumbering[V] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ Instruction* I = cast<Instruction>(V);
+ Expression exp;
+ switch (I->getOpcode()) {
+ case Instruction::Call:
+ return lookup_or_add_call(cast<CallInst>(I));
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or :
+ case Instruction::Xor:
+ exp = create_expression(cast<BinaryOperator>(I));
+ break;
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ exp = create_expression(cast<CmpInst>(I));
+ break;
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ exp = create_expression(cast<CastInst>(I));
+ break;
+ case Instruction::Select:
+ exp = create_expression(cast<SelectInst>(I));
+ break;
+ case Instruction::ExtractElement:
+ exp = create_expression(cast<ExtractElementInst>(I));
+ break;
+ case Instruction::InsertElement:
+ exp = create_expression(cast<InsertElementInst>(I));
+ break;
+ case Instruction::ShuffleVector:
+ exp = create_expression(cast<ShuffleVectorInst>(I));
+ break;
+ case Instruction::ExtractValue:
+ exp = create_expression(cast<ExtractValueInst>(I));
+ break;
+ case Instruction::InsertValue:
+ exp = create_expression(cast<InsertValueInst>(I));
+ break;
+ case Instruction::GetElementPtr:
+ exp = create_expression(cast<GetElementPtrInst>(I));
+ break;
+ default:
+ valueNumbering[V] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ uint32_t& e = expressionNumbering[exp];
+ if (!e) e = nextValueNumber++;
+ valueNumbering[V] = e;
+ return e;
+}
+
+/// lookup - Returns the value number of the specified value. Fails if
+/// the value has not yet been numbered.
+uint32_t ValueTable::lookup(Value *V) const {
+ DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
+ assert(VI != valueNumbering.end() && "Value not numbered?");
+ return VI->second;
+}
+
+/// clear - Remove all entries from the ValueTable
+void ValueTable::clear() {
+ valueNumbering.clear();
+ expressionNumbering.clear();
+ nextValueNumber = 1;
+}
+
+/// erase - Remove a value from the value numbering
+void ValueTable::erase(Value *V) {
+ valueNumbering.erase(V);
+}
+
+/// verifyRemoved - Verify that the value is removed from all internal data
+/// structures.
+void ValueTable::verifyRemoved(const Value *V) const {
+ for (DenseMap<Value*, uint32_t>::const_iterator
+ I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
+ assert(I->first != V && "Inst still occurs in value numbering map!");
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// GVN Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+ struct ValueNumberScope {
+ ValueNumberScope* parent;
+ DenseMap<uint32_t, Value*> table;
+
+ ValueNumberScope(ValueNumberScope* p) : parent(p) { }
+ };
+}
+
+namespace {
+
+ class GVN : public FunctionPass {
+ bool runOnFunction(Function &F);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit GVN(bool noloads = false)
+ : FunctionPass(ID), NoLoads(noloads), MD(0) { }
+
+ private:
+ bool NoLoads;
+ MemoryDependenceAnalysis *MD;
+ DominatorTree *DT;
+
+ ValueTable VN;
+ DenseMap<BasicBlock*, ValueNumberScope*> localAvail;
+
+ // List of critical edges to be split between iterations.
+ SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
+
+ // This transformation requires dominator postdominator info
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ if (!NoLoads)
+ AU.addRequired<MemoryDependenceAnalysis>();
+ AU.addRequired<AliasAnalysis>();
+
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<AliasAnalysis>();
+ }
+
+ // Helper fuctions
+ // FIXME: eliminate or document these better
+ bool processLoad(LoadInst* L,
+ SmallVectorImpl<Instruction*> &toErase);
+ bool processInstruction(Instruction *I,
+ SmallVectorImpl<Instruction*> &toErase);
+ bool processNonLocalLoad(LoadInst* L,
+ SmallVectorImpl<Instruction*> &toErase);
+ bool processBlock(BasicBlock *BB);
+ void dump(DenseMap<uint32_t, Value*>& d);
+ bool iterateOnFunction(Function &F);
+ Value *CollapsePhi(PHINode* p);
+ bool performPRE(Function& F);
+ Value *lookupNumber(BasicBlock *BB, uint32_t num);
+ void cleanupGlobalSets();
+ void verifyRemoved(const Instruction *I) const;
+ bool splitCriticalEdges();
+ };
+
+ char GVN::ID = 0;
+}
+
+// createGVNPass - The public interface to this file...
+FunctionPass *llvm::createGVNPass(bool NoLoads) {
+ return new GVN(NoLoads);
+}
+
+INITIALIZE_PASS(GVN, "gvn", "Global Value Numbering", false, false);
+
+void GVN::dump(DenseMap<uint32_t, Value*>& d) {
+ errs() << "{\n";
+ for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
+ E = d.end(); I != E; ++I) {
+ errs() << I->first << "\n";
+ I->second->dump();
+ }
+ errs() << "}\n";
+}
+
+static bool isSafeReplacement(PHINode* p, Instruction *inst) {
+ if (!isa<PHINode>(inst))
+ return true;
+
+ for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end();
+ UI != E; ++UI)
+ if (PHINode* use_phi = dyn_cast<PHINode>(*UI))
+ if (use_phi->getParent() == inst->getParent())
+ return false;
+
+ return true;
+}
+
+Value *GVN::CollapsePhi(PHINode *PN) {
+ Value *ConstVal = PN->hasConstantValue(DT);
+ if (!ConstVal) return 0;
+
+ Instruction *Inst = dyn_cast<Instruction>(ConstVal);
+ if (!Inst)
+ return ConstVal;
+
+ if (DT->dominates(Inst, PN))
+ if (isSafeReplacement(PN, Inst))
+ return Inst;
+ return 0;
+}
+
+/// IsValueFullyAvailableInBlock - Return true if we can prove that the value
+/// we're analyzing is fully available in the specified block. As we go, keep
+/// track of which blocks we know are fully alive in FullyAvailableBlocks. This
+/// map is actually a tri-state map with the following values:
+/// 0) we know the block *is not* fully available.
+/// 1) we know the block *is* fully available.
+/// 2) we do not know whether the block is fully available or not, but we are
+/// currently speculating that it will be.
+/// 3) we are speculating for this block and have used that to speculate for
+/// other blocks.
+static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
+ DenseMap<BasicBlock*, char> &FullyAvailableBlocks) {
+ // Optimistically assume that the block is fully available and check to see
+ // if we already know about this block in one lookup.
+ std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
+ FullyAvailableBlocks.insert(std::make_pair(BB, 2));
+
+ // If the entry already existed for this block, return the precomputed value.
+ if (!IV.second) {
+ // If this is a speculative "available" value, mark it as being used for
+ // speculation of other blocks.
+ if (IV.first->second == 2)
+ IV.first->second = 3;
+ return IV.first->second != 0;
+ }
+
+ // Otherwise, see if it is fully available in all predecessors.
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+
+ // If this block has no predecessors, it isn't live-in here.
+ if (PI == PE)
+ goto SpeculationFailure;
+
+ for (; PI != PE; ++PI)
+ // If the value isn't fully available in one of our predecessors, then it
+ // isn't fully available in this block either. Undo our previous
+ // optimistic assumption and bail out.
+ if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
+ goto SpeculationFailure;
+
+ return true;
+
+// SpeculationFailure - If we get here, we found out that this is not, after
+// all, a fully-available block. We have a problem if we speculated on this and
+// used the speculation to mark other blocks as available.
+SpeculationFailure:
+ char &BBVal = FullyAvailableBlocks[BB];
+
+ // If we didn't speculate on this, just return with it set to false.
+ if (BBVal == 2) {
+ BBVal = 0;
+ return false;
+ }
+
+ // If we did speculate on this value, we could have blocks set to 1 that are
+ // incorrect. Walk the (transitive) successors of this block and mark them as
+ // 0 if set to one.
+ SmallVector<BasicBlock*, 32> BBWorklist;
+ BBWorklist.push_back(BB);
+
+ do {
+ BasicBlock *Entry = BBWorklist.pop_back_val();
+ // Note that this sets blocks to 0 (unavailable) if they happen to not
+ // already be in FullyAvailableBlocks. This is safe.
+ char &EntryVal = FullyAvailableBlocks[Entry];
+ if (EntryVal == 0) continue; // Already unavailable.
+
+ // Mark as unavailable.
+ EntryVal = 0;
+
+ for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I)
+ BBWorklist.push_back(*I);
+ } while (!BBWorklist.empty());
+
+ return false;
+}
+
+
+/// CanCoerceMustAliasedValueToLoad - Return true if
+/// CoerceAvailableValueToLoadType will succeed.
+static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
+ const Type *LoadTy,
+ const TargetData &TD) {
+ // If the loaded or stored value is an first class array or struct, don't try
+ // to transform them. We need to be able to bitcast to integer.
+ if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
+ StoredVal->getType()->isStructTy() ||
+ StoredVal->getType()->isArrayTy())
+ return false;
+
+ // The store has to be at least as big as the load.
+ if (TD.getTypeSizeInBits(StoredVal->getType()) <
+ TD.getTypeSizeInBits(LoadTy))
+ return false;
+
+ return true;
+}
+
+
+/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
+/// then a load from a must-aliased pointer of a different type, try to coerce
+/// the stored value. LoadedTy is the type of the load we want to replace and
+/// InsertPt is the place to insert new instructions.
+///
+/// If we can't do it, return null.
+static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
+ const Type *LoadedTy,
+ Instruction *InsertPt,
+ const TargetData &TD) {
+ if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
+ return 0;
+
+ const Type *StoredValTy = StoredVal->getType();
+
+ uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy);
+ uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
+
+ // If the store and reload are the same size, we can always reuse it.
+ if (StoreSize == LoadSize) {
+ if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) {
+ // Pointer to Pointer -> use bitcast.
+ return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
+ }
+
+ // Convert source pointers to integers, which can be bitcast.
+ if (StoredValTy->isPointerTy()) {
+ StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
+ StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
+ }
+
+ const Type *TypeToCastTo = LoadedTy;
+ if (TypeToCastTo->isPointerTy())
+ TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
+
+ if (StoredValTy != TypeToCastTo)
+ StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
+
+ // Cast to pointer if the load needs a pointer type.
+ if (LoadedTy->isPointerTy())
+ StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
+
+ return StoredVal;
+ }
+
+ // If the loaded value is smaller than the available value, then we can
+ // extract out a piece from it. If the available value is too small, then we
+ // can't do anything.
+ assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
+
+ // Convert source pointers to integers, which can be manipulated.
+ if (StoredValTy->isPointerTy()) {
+ StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
+ StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
+ }
+
+ // Convert vectors and fp to integer, which can be manipulated.
+ if (!StoredValTy->isIntegerTy()) {
+ StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
+ StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
+ }
+
+ // If this is a big-endian system, we need to shift the value down to the low
+ // bits so that a truncate will work.
+ if (TD.isBigEndian()) {
+ Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
+ StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
+ }
+
+ // Truncate the integer to the right size now.
+ const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
+ StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
+
+ if (LoadedTy == NewIntTy)
+ return StoredVal;
+
+ // If the result is a pointer, inttoptr.
+ if (LoadedTy->isPointerTy())
+ return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
+
+ // Otherwise, bitcast.
+ return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
+}
+
+/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can
+/// be expressed as a base pointer plus a constant offset. Return the base and
+/// offset to the caller.
+static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
+ const TargetData &TD) {
+ Operator *PtrOp = dyn_cast<Operator>(Ptr);
+ if (PtrOp == 0) return Ptr;
+
+ // Just look through bitcasts.
+ if (PtrOp->getOpcode() == Instruction::BitCast)
+ return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD);
+
+ // If this is a GEP with constant indices, we can look through it.
+ GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp);
+ if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr;
+
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E;
+ ++I, ++GTI) {
+ ConstantInt *OpC = cast<ConstantInt>(*I);
+ if (OpC->isZero()) continue;
+
+ // Handle a struct and array indices which add their offset to the pointer.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+ } else {
+ uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+ Offset += OpC->getSExtValue()*Size;
+ }
+ }
+
+ // Re-sign extend from the pointer size if needed to get overflow edge cases
+ // right.
+ unsigned PtrSize = TD.getPointerSizeInBits();
+ if (PtrSize < 64)
+ Offset = (Offset << (64-PtrSize)) >> (64-PtrSize);
+
+ return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD);
+}
+
+
+/// AnalyzeLoadFromClobberingWrite - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering memory write (store,
+/// memset, memcpy, memmove). This means that the write *may* provide bits used
+/// by the load but we can't be sure because the pointers don't mustalias.
+///
+/// Check this case to see if there is anything more we can do before we give
+/// up. This returns -1 if we have to give up, or a byte number in the stored
+/// value of the piece that feeds the load.
+static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
+ Value *WritePtr,
+ uint64_t WriteSizeInBits,
+ const TargetData &TD) {
+ // If the loaded or stored value is an first class array or struct, don't try
+ // to transform them. We need to be able to bitcast to integer.
+ if (LoadTy->isStructTy() || LoadTy->isArrayTy())
+ return -1;
+
+ int64_t StoreOffset = 0, LoadOffset = 0;
+ Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD);
+ Value *LoadBase =
+ GetBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
+ if (StoreBase != LoadBase)
+ return -1;
+
+ // If the load and store are to the exact same address, they should have been
+ // a must alias. AA must have gotten confused.
+ // FIXME: Study to see if/when this happens. One case is forwarding a memset
+ // to a load from the base of the memset.
+#if 0
+ if (LoadOffset == StoreOffset) {
+ dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
+ << "Base = " << *StoreBase << "\n"
+ << "Store Ptr = " << *WritePtr << "\n"
+ << "Store Offs = " << StoreOffset << "\n"
+ << "Load Ptr = " << *LoadPtr << "\n";
+ abort();
+ }
+#endif
+
+ // If the load and store don't overlap at all, the store doesn't provide
+ // anything to the load. In this case, they really don't alias at all, AA
+ // must have gotten confused.
+ // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
+ // remove this check, as it is duplicated with what we have below.
+ uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
+
+ if ((WriteSizeInBits & 7) | (LoadSize & 7))
+ return -1;
+ uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes.
+ LoadSize >>= 3;
+
+
+ bool isAAFailure = false;
+ if (StoreOffset < LoadOffset)
+ isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
+ else
+ isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
+
+ if (isAAFailure) {
+#if 0
+ dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n"
+ << "Base = " << *StoreBase << "\n"
+ << "Store Ptr = " << *WritePtr << "\n"
+ << "Store Offs = " << StoreOffset << "\n"
+ << "Load Ptr = " << *LoadPtr << "\n";
+ abort();
+#endif
+ return -1;
+ }
+
+ // If the Load isn't completely contained within the stored bits, we don't
+ // have all the bits to feed it. We could do something crazy in the future
+ // (issue a smaller load then merge the bits in) but this seems unlikely to be
+ // valuable.
+ if (StoreOffset > LoadOffset ||
+ StoreOffset+StoreSize < LoadOffset+LoadSize)
+ return -1;
+
+ // Okay, we can do this transformation. Return the number of bytes into the
+ // store that the load is.
+ return LoadOffset-StoreOffset;
+}
+
+/// AnalyzeLoadFromClobberingStore - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering store.
+static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
+ StoreInst *DepSI,
+ const TargetData &TD) {
+ // Cannot handle reading from store of first-class aggregate yet.
+ if (DepSI->getOperand(0)->getType()->isStructTy() ||
+ DepSI->getOperand(0)->getType()->isArrayTy())
+ return -1;
+
+ Value *StorePtr = DepSI->getPointerOperand();
+ uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType());
+ return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
+ StorePtr, StoreSize, TD);
+}
+
+static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
+ MemIntrinsic *MI,
+ const TargetData &TD) {
+ // If the mem operation is a non-constant size, we can't handle it.
+ ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
+ if (SizeCst == 0) return -1;
+ uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
+
+ // If this is memset, we just need to see if the offset is valid in the size
+ // of the memset..
+ if (MI->getIntrinsicID() == Intrinsic::memset)
+ return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
+ MemSizeInBits, TD);
+
+ // If we have a memcpy/memmove, the only case we can handle is if this is a
+ // copy from constant memory. In that case, we can read directly from the
+ // constant memory.
+ MemTransferInst *MTI = cast<MemTransferInst>(MI);
+
+ Constant *Src = dyn_cast<Constant>(MTI->getSource());
+ if (Src == 0) return -1;
+
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(Src->getUnderlyingObject());
+ if (GV == 0 || !GV->isConstant()) return -1;
+
+ // See if the access is within the bounds of the transfer.
+ int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
+ MI->getDest(), MemSizeInBits, TD);
+ if (Offset == -1)
+ return Offset;
+
+ // Otherwise, see if we can constant fold a load from the constant with the
+ // offset applied as appropriate.
+ Src = ConstantExpr::getBitCast(Src,
+ llvm::Type::getInt8PtrTy(Src->getContext()));
+ Constant *OffsetCst =
+ ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+ Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
+ Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+ if (ConstantFoldLoadFromConstPtr(Src, &TD))
+ return Offset;
+ return -1;
+}
+
+
+/// GetStoreValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering store. This means
+/// that the store *may* provide bits used by the load but we can't be sure
+/// because the pointers don't mustalias. Check this case to see if there is
+/// anything more we can do before we give up.
+static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
+ const Type *LoadTy,
+ Instruction *InsertPt, const TargetData &TD){
+ LLVMContext &Ctx = SrcVal->getType()->getContext();
+
+ uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
+ uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8;
+
+ IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
+
+ // Compute which bits of the stored value are being used by the load. Convert
+ // to an integer type to start with.
+ if (SrcVal->getType()->isPointerTy())
+ SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp");
+ if (!SrcVal->getType()->isIntegerTy())
+ SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8),
+ "tmp");
+
+ // Shift the bits to the least significant depending on endianness.
+ unsigned ShiftAmt;
+ if (TD.isLittleEndian())
+ ShiftAmt = Offset*8;
+ else
+ ShiftAmt = (StoreSize-LoadSize-Offset)*8;
+
+ if (ShiftAmt)
+ SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp");
+
+ if (LoadSize != StoreSize)
+ SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8),
+ "tmp");
+
+ return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
+}
+
+/// GetMemInstValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering mem intrinsic.
+static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
+ const Type *LoadTy, Instruction *InsertPt,
+ const TargetData &TD){
+ LLVMContext &Ctx = LoadTy->getContext();
+ uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
+
+ IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
+
+ // We know that this method is only called when the mem transfer fully
+ // provides the bits for the load.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
+ // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
+ // independently of what the offset is.
+ Value *Val = MSI->getValue();
+ if (LoadSize != 1)
+ Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
+
+ Value *OneElt = Val;
+
+ // Splat the value out to the right number of bits.
+ for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
+ // If we can double the number of bytes set, do it.
+ if (NumBytesSet*2 <= LoadSize) {
+ Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8);
+ Val = Builder.CreateOr(Val, ShVal);
+ NumBytesSet <<= 1;
+ continue;
+ }
+
+ // Otherwise insert one byte at a time.
+ Value *ShVal = Builder.CreateShl(Val, 1*8);
+ Val = Builder.CreateOr(OneElt, ShVal);
+ ++NumBytesSet;
+ }
+
+ return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
+ }
+
+ // Otherwise, this is a memcpy/memmove from a constant global.
+ MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
+ Constant *Src = cast<Constant>(MTI->getSource());
+
+ // Otherwise, see if we can constant fold a load from the constant with the
+ // offset applied as appropriate.
+ Src = ConstantExpr::getBitCast(Src,
+ llvm::Type::getInt8PtrTy(Src->getContext()));
+ Constant *OffsetCst =
+ ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+ Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
+ Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+ return ConstantFoldLoadFromConstPtr(Src, &TD);
+}
+
+namespace {
+
+struct AvailableValueInBlock {
+ /// BB - The basic block in question.
+ BasicBlock *BB;
+ enum ValType {
+ SimpleVal, // A simple offsetted value that is accessed.
+ MemIntrin // A memory intrinsic which is loaded from.
+ };
+
+ /// V - The value that is live out of the block.
+ PointerIntPair<Value *, 1, ValType> Val;
+
+ /// Offset - The byte offset in Val that is interesting for the load query.
+ unsigned Offset;
+
+ static AvailableValueInBlock get(BasicBlock *BB, Value *V,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(V);
+ Res.Val.setInt(SimpleVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(MI);
+ Res.Val.setInt(MemIntrin);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+ Value *getSimpleValue() const {
+ assert(isSimpleValue() && "Wrong accessor");
+ return Val.getPointer();
+ }
+
+ MemIntrinsic *getMemIntrinValue() const {
+ assert(!isSimpleValue() && "Wrong accessor");
+ return cast<MemIntrinsic>(Val.getPointer());
+ }
+
+ /// MaterializeAdjustedValue - Emit code into this block to adjust the value
+ /// defined here to the specified type. This handles various coercion cases.
+ Value *MaterializeAdjustedValue(const Type *LoadTy,
+ const TargetData *TD) const {
+ Value *Res;
+ if (isSimpleValue()) {
+ Res = getSimpleValue();
+ if (Res->getType() != LoadTy) {
+ assert(TD && "Need target data to handle type mismatch case");
+ Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
+ *TD);
+
+ DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
+ << *getSimpleValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ } else {
+ Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
+ LoadTy, BB->getTerminator(), *TD);
+ DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+ << " " << *getMemIntrinValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ return Res;
+ }
+};
+
+}
+
+/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
+/// construct SSA form, allowing us to eliminate LI. This returns the value
+/// that should be used at LI's definition site.
+static Value *ConstructSSAForLoadSet(LoadInst *LI,
+ SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
+ const TargetData *TD,
+ const DominatorTree &DT,
+ AliasAnalysis *AA) {
+ // Check for the fully redundant, dominating load case. In this case, we can
+ // just use the dominating value directly.
+ if (ValuesPerBlock.size() == 1 &&
+ DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent()))
+ return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD);
+
+ // Otherwise, we have to construct SSA form.
+ SmallVector<PHINode*, 8> NewPHIs;
+ SSAUpdater SSAUpdate(&NewPHIs);
+ SSAUpdate.Initialize(LI->getType(), LI->getName());
+
+ const Type *LoadTy = LI->getType();
+
+ for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
+ const AvailableValueInBlock &AV = ValuesPerBlock[i];
+ BasicBlock *BB = AV.BB;
+
+ if (SSAUpdate.HasValueForBlock(BB))
+ continue;
+
+ SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD));
+ }
+
+ // Perform PHI construction.
+ Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
+
+ // If new PHI nodes were created, notify alias analysis.
+ if (V->getType()->isPointerTy())
+ for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+ AA->copyValue(LI, NewPHIs[i]);
+
+ return V;
+}
+
+static bool isLifetimeStart(const Instruction *Inst) {
+ if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
+ return II->getIntrinsicID() == Intrinsic::lifetime_start;
+ return false;
+}
+
+/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
+/// non-local by performing PHI construction.
+bool GVN::processNonLocalLoad(LoadInst *LI,
+ SmallVectorImpl<Instruction*> &toErase) {
+ // Find the non-local dependencies of the load.
+ SmallVector<NonLocalDepResult, 64> Deps;
+ MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(),
+ Deps);
+ //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
+ // << Deps.size() << *LI << '\n');
+
+ // If we had to process more than one hundred blocks to find the
+ // dependencies, this load isn't worth worrying about. Optimizing
+ // it will be too expensive.
+ if (Deps.size() > 100)
+ return false;
+
+ // If we had a phi translation failure, we'll have a single entry which is a
+ // clobber in the current block. Reject this early.
+ if (Deps.size() == 1 && Deps[0].getResult().isClobber()) {
+ DEBUG(
+ dbgs() << "GVN: non-local load ";
+ WriteAsOperand(dbgs(), LI);
+ dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n';
+ );
+ return false;
+ }
+
+ // Filter out useless results (non-locals, etc). Keep track of the blocks
+ // where we have a value available in repl, also keep track of whether we see
+ // dependencies that produce an unknown value for the load (such as a call
+ // that could potentially clobber the load).
+ SmallVector<AvailableValueInBlock, 16> ValuesPerBlock;
+ SmallVector<BasicBlock*, 16> UnavailableBlocks;
+
+ const TargetData *TD = 0;
+
+ for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
+ BasicBlock *DepBB = Deps[i].getBB();
+ MemDepResult DepInfo = Deps[i].getResult();
+
+ if (DepInfo.isClobber()) {
+ // The address being loaded in this non-local block may not be the same as
+ // the pointer operand of the load if PHI translation occurs. Make sure
+ // to consider the right address.
+ Value *Address = Deps[i].getAddress();
+
+ // If the dependence is to a store that writes to a superset of the bits
+ // read by the load, we can extract the bits we need for the load from the
+ // stored value.
+ if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
+ if (TD == 0)
+ TD = getAnalysisIfAvailable<TargetData>();
+ if (TD && Address) {
+ int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
+ DepSI, *TD);
+ if (Offset != -1) {
+ ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
+ DepSI->getOperand(0),
+ Offset));
+ continue;
+ }
+ }
+ }
+
+ // If the clobbering value is a memset/memcpy/memmove, see if we can
+ // forward a value on from it.
+ if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
+ if (TD == 0)
+ TD = getAnalysisIfAvailable<TargetData>();
+ if (TD && Address) {
+ int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
+ DepMI, *TD);
+ if (Offset != -1) {
+ ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
+ Offset));
+ continue;
+ }
+ }
+ }
+
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+
+ Instruction *DepInst = DepInfo.getInst();
+
+ // Loading the allocation -> undef.
+ if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) ||
+ // Loading immediately after lifetime begin -> undef.
+ isLifetimeStart(DepInst)) {
+ ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
+ UndefValue::get(LI->getType())));
+ continue;
+ }
+
+ if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
+ // Reject loads and stores that are to the same address but are of
+ // different types if we have to.
+ if (S->getOperand(0)->getType() != LI->getType()) {
+ if (TD == 0)
+ TD = getAnalysisIfAvailable<TargetData>();
+
+ // If the stored value is larger or equal to the loaded value, we can
+ // reuse it.
+ if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getOperand(0),
+ LI->getType(), *TD)) {
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+ }
+
+ ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
+ S->getOperand(0)));
+ continue;
+ }
+
+ if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
+ // If the types mismatch and we can't handle it, reject reuse of the load.
+ if (LD->getType() != LI->getType()) {
+ if (TD == 0)
+ TD = getAnalysisIfAvailable<TargetData>();
+
+ // If the stored value is larger or equal to the loaded value, we can
+ // reuse it.
+ if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+ }
+ ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));
+ continue;
+ }
+
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+
+ // If we have no predecessors that produce a known value for this load, exit
+ // early.
+ if (ValuesPerBlock.empty()) return false;
+
+ // If all of the instructions we depend on produce a known value for this
+ // load, then it is fully redundant and we can use PHI insertion to compute
+ // its value. Insert PHIs and remove the fully redundant value now.
+ if (UnavailableBlocks.empty()) {
+ DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
+
+ // Perform PHI construction.
+ Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
+ VN.getAliasAnalysis());
+ LI->replaceAllUsesWith(V);
+
+ if (isa<PHINode>(V))
+ V->takeName(LI);
+ if (V->getType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(V);
+ VN.erase(LI);
+ toErase.push_back(LI);
+ ++NumGVNLoad;
+ return true;
+ }
+
+ if (!EnablePRE || !EnableLoadPRE)
+ return false;
+
+ // Okay, we have *some* definitions of the value. This means that the value
+ // is available in some of our (transitive) predecessors. Lets think about
+ // doing PRE of this load. This will involve inserting a new load into the
+ // predecessor when it's not available. We could do this in general, but
+ // prefer to not increase code size. As such, we only do this when we know
+ // that we only have to insert *one* load (which means we're basically moving
+ // the load, not inserting a new one).
+
+ SmallPtrSet<BasicBlock *, 4> Blockers;
+ for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
+ Blockers.insert(UnavailableBlocks[i]);
+
+ // Lets find first basic block with more than one predecessor. Walk backwards
+ // through predecessors if needed.
+ BasicBlock *LoadBB = LI->getParent();
+ BasicBlock *TmpBB = LoadBB;
+
+ bool isSinglePred = false;
+ bool allSingleSucc = true;
+ while (TmpBB->getSinglePredecessor()) {
+ isSinglePred = true;
+ TmpBB = TmpBB->getSinglePredecessor();
+ if (TmpBB == LoadBB) // Infinite (unreachable) loop.
+ return false;
+ if (Blockers.count(TmpBB))
+ return false;
+ if (TmpBB->getTerminator()->getNumSuccessors() != 1)
+ allSingleSucc = false;
+ }
+
+ assert(TmpBB);
+ LoadBB = TmpBB;
+
+ // If we have a repl set with LI itself in it, this means we have a loop where
+ // at least one of the values is LI. Since this means that we won't be able
+ // to eliminate LI even if we insert uses in the other predecessors, we will
+ // end up increasing code size. Reject this by scanning for LI.
+ for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
+ if (ValuesPerBlock[i].isSimpleValue() &&
+ ValuesPerBlock[i].getSimpleValue() == LI) {
+ // Skip cases where LI is the only definition, even for EnableFullLoadPRE.
+ if (!EnableFullLoadPRE || e == 1)
+ return false;
+ }
+ }
+
+ // FIXME: It is extremely unclear what this loop is doing, other than
+ // artificially restricting loadpre.
+ if (isSinglePred) {
+ bool isHot = false;
+ for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
+ const AvailableValueInBlock &AV = ValuesPerBlock[i];
+ if (AV.isSimpleValue())
+ // "Hot" Instruction is in some loop (because it dominates its dep.
+ // instruction).
+ if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue()))
+ if (DT->dominates(LI, I)) {
+ isHot = true;
+ break;
+ }
+ }
+
+ // We are interested only in "hot" instructions. We don't want to do any
+ // mis-optimizations here.
+ if (!isHot)
+ return false;
+ }
+
+ // Check to see how many predecessors have the loaded value fully
+ // available.
+ DenseMap<BasicBlock*, Value*> PredLoads;
+ DenseMap<BasicBlock*, char> FullyAvailableBlocks;
+ for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
+ FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
+ for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
+ FullyAvailableBlocks[UnavailableBlocks[i]] = false;
+
+ SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit;
+ for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
+ PI != E; ++PI) {
+ BasicBlock *Pred = *PI;
+ if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) {
+ continue;
+ }
+ PredLoads[Pred] = 0;
+
+ if (Pred->getTerminator()->getNumSuccessors() != 1) {
+ if (isa<IndirectBrInst>(Pred->getTerminator())) {
+ DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
+ << Pred->getName() << "': " << *LI << '\n');
+ return false;
+ }
+ unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
+ NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
+ }
+ }
+ if (!NeedToSplit.empty()) {
+ toSplit.append(NeedToSplit.begin(), NeedToSplit.end());
+ return false;
+ }
+
+ // Decide whether PRE is profitable for this load.
+ unsigned NumUnavailablePreds = PredLoads.size();
+ assert(NumUnavailablePreds != 0 &&
+ "Fully available value should be eliminated above!");
+ if (!EnableFullLoadPRE) {
+ // If this load is unavailable in multiple predecessors, reject it.
+ // FIXME: If we could restructure the CFG, we could make a common pred with
+ // all the preds that don't have an available LI and insert a new load into
+ // that one block.
+ if (NumUnavailablePreds != 1)
+ return false;
+ }
+
+ // Check if the load can safely be moved to all the unavailable predecessors.
+ bool CanDoPRE = true;
+ SmallVector<Instruction*, 8> NewInsts;
+ for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
+ E = PredLoads.end(); I != E; ++I) {
+ BasicBlock *UnavailablePred = I->first;
+
+ // Do PHI translation to get its value in the predecessor if necessary. The
+ // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
+
+ // If all preds have a single successor, then we know it is safe to insert
+ // the load on the pred (?!?), so we can insert code to materialize the
+ // pointer if it is not available.
+ PHITransAddr Address(LI->getOperand(0), TD);
+ Value *LoadPtr = 0;
+ if (allSingleSucc) {
+ LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
+ *DT, NewInsts);
+ } else {
+ Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
+ LoadPtr = Address.getAddr();
+ }
+
+ // If we couldn't find or insert a computation of this phi translated value,
+ // we fail PRE.
+ if (LoadPtr == 0) {
+ DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
+ << *LI->getOperand(0) << "\n");
+ CanDoPRE = false;
+ break;
+ }
+
+ // Make sure it is valid to move this load here. We have to watch out for:
+ // @1 = getelementptr (i8* p, ...
+ // test p and branch if == 0
+ // load @1
+ // It is valid to have the getelementptr before the test, even if p can be 0,
+ // as getelementptr only does address arithmetic.
+ // If we are not pushing the value through any multiple-successor blocks
+ // we do not have this case. Otherwise, check that the load is safe to
+ // put anywhere; this can be improved, but should be conservatively safe.
+ if (!allSingleSucc &&
+ // FIXME: REEVALUTE THIS.
+ !isSafeToLoadUnconditionally(LoadPtr,
+ UnavailablePred->getTerminator(),
+ LI->getAlignment(), TD)) {
+ CanDoPRE = false;
+ break;
+ }
+
+ I->second = LoadPtr;
+ }
+
+ if (!CanDoPRE) {
+ while (!NewInsts.empty())
+ NewInsts.pop_back_val()->eraseFromParent();
+ return false;
+ }
+
+ // Okay, we can eliminate this load by inserting a reload in the predecessor
+ // and using PHI construction to get the value in the other predecessors, do
+ // it.
+ DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
+ DEBUG(if (!NewInsts.empty())
+ dbgs() << "INSERTED " << NewInsts.size() << " INSTS: "
+ << *NewInsts.back() << '\n');
+
+ // Assign value numbers to the new instructions.
+ for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
+ // FIXME: We really _ought_ to insert these value numbers into their
+ // parent's availability map. However, in doing so, we risk getting into
+ // ordering issues. If a block hasn't been processed yet, we would be
+ // marking a value as AVAIL-IN, which isn't what we intend.
+ VN.lookup_or_add(NewInsts[i]);
+ }
+
+ for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
+ E = PredLoads.end(); I != E; ++I) {
+ BasicBlock *UnavailablePred = I->first;
+ Value *LoadPtr = I->second;
+
+ Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
+ LI->getAlignment(),
+ UnavailablePred->getTerminator());
+
+ // Add the newly created load.
+ ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
+ NewLoad));
+ MD->invalidateCachedPointerInfo(LoadPtr);
+ DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
+ }
+
+ // Perform PHI construction.
+ Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
+ VN.getAliasAnalysis());
+ LI->replaceAllUsesWith(V);
+ if (isa<PHINode>(V))
+ V->takeName(LI);
+ if (V->getType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(V);
+ VN.erase(LI);
+ toErase.push_back(LI);
+ ++NumPRELoad;
+ return true;
+}
+
+/// processLoad - Attempt to eliminate a load, first by eliminating it
+/// locally, and then attempting non-local elimination if that fails.
+bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
+ if (!MD)
+ return false;
+
+ if (L->isVolatile())
+ return false;
+
+ // ... to a pointer that has been loaded from before...
+ MemDepResult Dep = MD->getDependency(L);
+
+ // If the value isn't available, don't do anything!
+ if (Dep.isClobber()) {
+ // Check to see if we have something like this:
+ // store i32 123, i32* %P
+ // %A = bitcast i32* %P to i8*
+ // %B = gep i8* %A, i32 1
+ // %C = load i8* %B
+ //
+ // We could do that by recognizing if the clobber instructions are obviously
+ // a common base + constant offset, and if the previous store (or memset)
+ // completely covers this load. This sort of thing can happen in bitfield
+ // access code.
+ Value *AvailVal = 0;
+ if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
+ if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
+ int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
+ L->getPointerOperand(),
+ DepSI, *TD);
+ if (Offset != -1)
+ AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
+ L->getType(), L, *TD);
+ }
+
+ // If the clobbering value is a memset/memcpy/memmove, see if we can forward
+ // a value on from it.
+ if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
+ if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
+ int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
+ L->getPointerOperand(),
+ DepMI, *TD);
+ if (Offset != -1)
+ AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
+ }
+ }
+
+ if (AvailVal) {
+ DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
+ << *AvailVal << '\n' << *L << "\n\n\n");
+
+ // Replace the load!
+ L->replaceAllUsesWith(AvailVal);
+ if (AvailVal->getType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(AvailVal);
+ VN.erase(L);
+ toErase.push_back(L);
+ ++NumGVNLoad;
+ return true;
+ }
+
+ DEBUG(
+ // fast print dep, using operator<< on instruction would be too slow
+ dbgs() << "GVN: load ";
+ WriteAsOperand(dbgs(), L);
+ Instruction *I = Dep.getInst();
+ dbgs() << " is clobbered by " << *I << '\n';
+ );
+ return false;
+ }
+
+ // If it is defined in another block, try harder.
+ if (Dep.isNonLocal())
+ return processNonLocalLoad(L, toErase);
+
+ Instruction *DepInst = Dep.getInst();
+ if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
+ Value *StoredVal = DepSI->getOperand(0);
+
+ // The store and load are to a must-aliased pointer, but they may not
+ // actually have the same type. See if we know how to reuse the stored
+ // value (depending on its type).
+ const TargetData *TD = 0;
+ if (StoredVal->getType() != L->getType()) {
+ if ((TD = getAnalysisIfAvailable<TargetData>())) {
+ StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
+ L, *TD);
+ if (StoredVal == 0)
+ return false;
+
+ DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
+ << '\n' << *L << "\n\n\n");
+ }
+ else
+ return false;
+ }
+
+ // Remove it!
+ L->replaceAllUsesWith(StoredVal);
+ if (StoredVal->getType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(StoredVal);
+ VN.erase(L);
+ toErase.push_back(L);
+ ++NumGVNLoad;
+ return true;
+ }
+
+ if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
+ Value *AvailableVal = DepLI;
+
+ // The loads are of a must-aliased pointer, but they may not actually have
+ // the same type. See if we know how to reuse the previously loaded value
+ // (depending on its type).
+ const TargetData *TD = 0;
+ if (DepLI->getType() != L->getType()) {
+ if ((TD = getAnalysisIfAvailable<TargetData>())) {
+ AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
+ if (AvailableVal == 0)
+ return false;
+
+ DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
+ << "\n" << *L << "\n\n\n");
+ }
+ else
+ return false;
+ }
+
+ // Remove it!
+ L->replaceAllUsesWith(AvailableVal);
+ if (DepLI->getType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(DepLI);
+ VN.erase(L);
+ toErase.push_back(L);
+ ++NumGVNLoad;
+ return true;
+ }
+
+ // If this load really doesn't depend on anything, then we must be loading an
+ // undef value. This can happen when loading for a fresh allocation with no
+ // intervening stores, for example.
+ if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
+ L->replaceAllUsesWith(UndefValue::get(L->getType()));
+ VN.erase(L);
+ toErase.push_back(L);
+ ++NumGVNLoad;
+ return true;
+ }
+
+ // If this load occurs either right after a lifetime begin,
+ // then the loaded value is undefined.
+ if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
+ L->replaceAllUsesWith(UndefValue::get(L->getType()));
+ VN.erase(L);
+ toErase.push_back(L);
+ ++NumGVNLoad;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {
+ DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB);
+ if (I == localAvail.end())
+ return 0;
+
+ ValueNumberScope *Locals = I->second;
+ while (Locals) {
+ DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num);
+ if (I != Locals->table.end())
+ return I->second;
+ Locals = Locals->parent;
+ }
+
+ return 0;
+}
+
+
+/// processInstruction - When calculating availability, handle an instruction
+/// by inserting it into the appropriate sets
+bool GVN::processInstruction(Instruction *I,
+ SmallVectorImpl<Instruction*> &toErase) {
+ // Ignore dbg info intrinsics.
+ if (isa<DbgInfoIntrinsic>(I))
+ return false;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ bool Changed = processLoad(LI, toErase);
+
+ if (!Changed) {
+ unsigned Num = VN.lookup_or_add(LI);
+ localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI));
+ }
+
+ return Changed;
+ }
+
+ uint32_t NextNum = VN.getNextUnusedValueNumber();
+ unsigned Num = VN.lookup_or_add(I);
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+ localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
+
+ if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
+ return false;
+
+ Value *BranchCond = BI->getCondition();
+ uint32_t CondVN = VN.lookup_or_add(BranchCond);
+
+ BasicBlock *TrueSucc = BI->getSuccessor(0);
+ BasicBlock *FalseSucc = BI->getSuccessor(1);
+
+ if (TrueSucc->getSinglePredecessor())
+ localAvail[TrueSucc]->table[CondVN] =
+ ConstantInt::getTrue(TrueSucc->getContext());
+ if (FalseSucc->getSinglePredecessor())
+ localAvail[FalseSucc]->table[CondVN] =
+ ConstantInt::getFalse(TrueSucc->getContext());
+
+ return false;
+
+ // Allocations are always uniquely numbered, so we can save time and memory
+ // by fast failing them.
+ } else if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) {
+ localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
+ return false;
+ }
+
+ // Collapse PHI nodes
+ if (PHINode* p = dyn_cast<PHINode>(I)) {
+ Value *constVal = CollapsePhi(p);
+
+ if (constVal) {
+ p->replaceAllUsesWith(constVal);
+ if (MD && constVal->getType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(constVal);
+ VN.erase(p);
+
+ toErase.push_back(p);
+ } else {
+ localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
+ }
+
+ // If the number we were assigned was a brand new VN, then we don't
+ // need to do a lookup to see if the number already exists
+ // somewhere in the domtree: it can't!
+ } else if (Num == NextNum) {
+ localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
+
+ // Perform fast-path value-number based elimination of values inherited from
+ // dominators.
+ } else if (Value *repl = lookupNumber(I->getParent(), Num)) {
+ // Remove it!
+ VN.erase(I);
+ I->replaceAllUsesWith(repl);
+ if (MD && repl->getType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(repl);
+ toErase.push_back(I);
+ return true;
+
+ } else {
+ localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
+ }
+
+ return false;
+}
+
+/// runOnFunction - This is the main transformation entry point for a function.
+bool GVN::runOnFunction(Function& F) {
+ if (!NoLoads)
+ MD = &getAnalysis<MemoryDependenceAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
+ VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
+ VN.setMemDep(MD);
+ VN.setDomTree(DT);
+
+ bool Changed = false;
+ bool ShouldContinue = true;
+
+ // Merge unconditional branches, allowing PRE to catch more
+ // optimization opportunities.
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
+ BasicBlock *BB = FI;
+ ++FI;
+ bool removedBlock = MergeBlockIntoPredecessor(BB, this);
+ if (removedBlock) ++NumGVNBlocks;
+
+ Changed |= removedBlock;
+ }
+
+ unsigned Iteration = 0;
+
+ while (ShouldContinue) {
+ DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
+ ShouldContinue = iterateOnFunction(F);
+ if (splitCriticalEdges())
+ ShouldContinue = true;
+ Changed |= ShouldContinue;
+ ++Iteration;
+ }
+
+ if (EnablePRE) {
+ bool PREChanged = true;
+ while (PREChanged) {
+ PREChanged = performPRE(F);
+ Changed |= PREChanged;
+ }
+ }
+ // FIXME: Should perform GVN again after PRE does something. PRE can move
+ // computations into blocks where they become fully redundant. Note that
+ // we can't do this until PRE's critical edge splitting updates memdep.
+ // Actually, when this happens, we should just fully integrate PRE into GVN.
+
+ cleanupGlobalSets();
+
+ return Changed;
+}
+
+
+bool GVN::processBlock(BasicBlock *BB) {
+ // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and
+ // incrementing BI before processing an instruction).
+ SmallVector<Instruction*, 8> toErase;
+ bool ChangedFunction = false;
+
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+ BI != BE;) {
+ ChangedFunction |= processInstruction(BI, toErase);
+ if (toErase.empty()) {
+ ++BI;
+ continue;
+ }
+
+ // If we need some instructions deleted, do it now.
+ NumGVNInstr += toErase.size();
+
+ // Avoid iterator invalidation.
+ bool AtStart = BI == BB->begin();
+ if (!AtStart)
+ --BI;
+
+ for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
+ E = toErase.end(); I != E; ++I) {
+ DEBUG(dbgs() << "GVN removed: " << **I << '\n');
+ if (MD) MD->removeInstruction(*I);
+ (*I)->eraseFromParent();
+ DEBUG(verifyRemoved(*I));
+ }
+ toErase.clear();
+
+ if (AtStart)
+ BI = BB->begin();
+ else
+ ++BI;
+ }
+
+ return ChangedFunction;
+}
+
+/// performPRE - Perform a purely local form of PRE that looks for diamond
+/// control flow patterns and attempts to perform simple PRE at the join point.
+bool GVN::performPRE(Function &F) {
+ bool Changed = false;
+ DenseMap<BasicBlock*, Value*> predMap;
+ for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
+ DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
+ BasicBlock *CurrentBlock = *DI;
+
+ // Nothing to PRE in the entry block.
+ if (CurrentBlock == &F.getEntryBlock()) continue;
+
+ for (BasicBlock::iterator BI = CurrentBlock->begin(),
+ BE = CurrentBlock->end(); BI != BE; ) {
+ Instruction *CurInst = BI++;
+
+ if (isa<AllocaInst>(CurInst) ||
+ isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) ||
+ CurInst->getType()->isVoidTy() ||
+ CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
+ isa<DbgInfoIntrinsic>(CurInst))
+ continue;
+
+ // We don't currently value number ANY inline asm calls.
+ if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
+ if (CallI->isInlineAsm())
+ continue;
+
+ uint32_t ValNo = VN.lookup(CurInst);
+
+ // Look for the predecessors for PRE opportunities. We're
+ // only trying to solve the basic diamond case, where
+ // a value is computed in the successor and one predecessor,
+ // but not the other. We also explicitly disallow cases
+ // where the successor is its own predecessor, because they're
+ // more complicated to get right.
+ unsigned NumWith = 0;
+ unsigned NumWithout = 0;
+ BasicBlock *PREPred = 0;
+ predMap.clear();
+
+ for (pred_iterator PI = pred_begin(CurrentBlock),
+ PE = pred_end(CurrentBlock); PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ // We're not interested in PRE where the block is its
+ // own predecessor, or in blocks with predecessors
+ // that are not reachable.
+ if (P == CurrentBlock) {
+ NumWithout = 2;
+ break;
+ } else if (!localAvail.count(P)) {
+ NumWithout = 2;
+ break;
+ }
+
+ DenseMap<uint32_t, Value*>::iterator predV =
+ localAvail[P]->table.find(ValNo);
+ if (predV == localAvail[P]->table.end()) {
+ PREPred = P;
+ ++NumWithout;
+ } else if (predV->second == CurInst) {
+ NumWithout = 2;
+ } else {
+ predMap[P] = predV->second;
+ ++NumWith;
+ }
+ }
+
+ // Don't do PRE when it might increase code size, i.e. when
+ // we would need to insert instructions in more than one pred.
+ if (NumWithout != 1 || NumWith == 0)
+ continue;
+
+ // Don't do PRE across indirect branch.
+ if (isa<IndirectBrInst>(PREPred->getTerminator()))
+ continue;
+
+ // We can't do PRE safely on a critical edge, so instead we schedule
+ // the edge to be split and perform the PRE the next time we iterate
+ // on the function.
+ unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
+ if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
+ toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
+ continue;
+ }
+
+ // Instantiate the expression in the predecessor that lacked it.
+ // Because we are going top-down through the block, all value numbers
+ // will be available in the predecessor by the time we need them. Any
+ // that weren't originally present will have been instantiated earlier
+ // in this loop.
+ Instruction *PREInstr = CurInst->clone();
+ bool success = true;
+ for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
+ Value *Op = PREInstr->getOperand(i);
+ if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
+ continue;
+
+ if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) {
+ PREInstr->setOperand(i, V);
+ } else {
+ success = false;
+ break;
+ }
+ }
+
+ // Fail out if we encounter an operand that is not available in
+ // the PRE predecessor. This is typically because of loads which
+ // are not value numbered precisely.
+ if (!success) {
+ delete PREInstr;
+ DEBUG(verifyRemoved(PREInstr));
+ continue;
+ }
+
+ PREInstr->insertBefore(PREPred->getTerminator());
+ PREInstr->setName(CurInst->getName() + ".pre");
+ predMap[PREPred] = PREInstr;
+ VN.add(PREInstr, ValNo);
+ ++NumGVNPRE;
+
+ // Update the availability map to include the new instruction.
+ localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr));
+
+ // Create a PHI to make the value available in this block.
+ PHINode* Phi = PHINode::Create(CurInst->getType(),
+ CurInst->getName() + ".pre-phi",
+ CurrentBlock->begin());
+ for (pred_iterator PI = pred_begin(CurrentBlock),
+ PE = pred_end(CurrentBlock); PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ Phi->addIncoming(predMap[P], P);
+ }
+
+ VN.add(Phi, ValNo);
+ localAvail[CurrentBlock]->table[ValNo] = Phi;
+
+ CurInst->replaceAllUsesWith(Phi);
+ if (MD && Phi->getType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(Phi);
+ VN.erase(CurInst);
+
+ DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
+ if (MD) MD->removeInstruction(CurInst);
+ CurInst->eraseFromParent();
+ DEBUG(verifyRemoved(CurInst));
+ Changed = true;
+ }
+ }
+
+ if (splitCriticalEdges())
+ Changed = true;
+
+ return Changed;
+}
+
+/// splitCriticalEdges - Split critical edges found during the previous
+/// iteration that may enable further optimization.
+bool GVN::splitCriticalEdges() {
+ if (toSplit.empty())
+ return false;
+ do {
+ std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
+ SplitCriticalEdge(Edge.first, Edge.second, this);
+ } while (!toSplit.empty());
+ if (MD) MD->invalidateCachedPredecessors();
+ return true;
+}
+
+/// iterateOnFunction - Executes one iteration of GVN
+bool GVN::iterateOnFunction(Function &F) {
+ cleanupGlobalSets();
+
+ for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
+ DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
+ if (DI->getIDom())
+ localAvail[DI->getBlock()] =
+ new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]);
+ else
+ localAvail[DI->getBlock()] = new ValueNumberScope(0);
+ }
+
+ // Top-down walk of the dominator tree
+ bool Changed = false;
+#if 0
+ // Needed for value numbering with phi construction to work.
+ ReversePostOrderTraversal<Function*> RPOT(&F);
+ for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(),
+ RE = RPOT.end(); RI != RE; ++RI)
+ Changed |= processBlock(*RI);
+#else
+ for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
+ DE = df_end(DT->getRootNode()); DI != DE; ++DI)
+ Changed |= processBlock(DI->getBlock());
+#endif
+
+ return Changed;
+}
+
+void GVN::cleanupGlobalSets() {
+ VN.clear();
+
+ for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
+ I = localAvail.begin(), E = localAvail.end(); I != E; ++I)
+ delete I->second;
+ localAvail.clear();
+}
+
+/// verifyRemoved - Verify that the specified instruction does not occur in our
+/// internal data structures.
+void GVN::verifyRemoved(const Instruction *Inst) const {
+ VN.verifyRemoved(Inst);
+
+ // Walk through the value number scope to make sure the instruction isn't
+ // ferreted away in it.
+ for (DenseMap<BasicBlock*, ValueNumberScope*>::const_iterator
+ I = localAvail.begin(), E = localAvail.end(); I != E; ++I) {
+ const ValueNumberScope *VNS = I->second;
+
+ while (VNS) {
+ for (DenseMap<uint32_t, Value*>::const_iterator
+ II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) {
+ assert(II->second != Inst && "Inst still in value numbering scope!");
+ }
+
+ VNS = VNS->parent;
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
new file mode 100644
index 0000000..af2eafc
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -0,0 +1,1034 @@
+//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation analyzes and transforms the induction variables (and
+// computations derived from them) into simpler forms suitable for subsequent
+// analysis and transformation.
+//
+// This transformation makes the following changes to each loop with an
+// identifiable induction variable:
+// 1. All loops are transformed to have a SINGLE canonical induction variable
+// which starts at zero and steps by one.
+// 2. The canonical induction variable is guaranteed to be the first PHI node
+// in the loop header block.
+// 3. The canonical induction variable is guaranteed to be in a wide enough
+// type so that IV expressions need not be (directly) zero-extended or
+// sign-extended.
+// 4. Any pointer arithmetic recurrences are raised to use array subscripts.
+//
+// If the trip count of a loop is computable, this pass also makes the following
+// changes:
+// 1. The exit condition for the loop is canonicalized to compare the
+// induction value against the exit value. This turns loops like:
+// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
+// 2. Any use outside of the loop of an expression derived from the indvar
+// is changed to compute the derived value outside of the loop, eliminating
+// the dependence on the exit value of the induction variable. If the only
+// purpose of the loop is to compute the exit value of some derived
+// expression, this transformation will make the loop dead.
+//
+// This transformation should be followed by strength reduction after all of the
+// desired loop transformations have been performed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "indvars"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+STATISTIC(NumRemoved , "Number of aux indvars removed");
+STATISTIC(NumInserted, "Number of canonical indvars added");
+STATISTIC(NumReplaced, "Number of exit values replaced");
+STATISTIC(NumLFTR , "Number of loop exit tests replaced");
+
+namespace {
+ class IndVarSimplify : public LoopPass {
+ IVUsers *IU;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ DominatorTree *DT;
+ bool Changed;
+ public:
+
+ static char ID; // Pass identification, replacement for typeid
+ IndVarSimplify() : LoopPass(ID) {}
+
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addRequired<IVUsers>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved<IVUsers>();
+ AU.setPreservesCFG();
+ }
+
+ private:
+
+ void EliminateIVComparisons();
+ void EliminateIVRemainders();
+ void RewriteNonIntegerIVs(Loop *L);
+
+ ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+ PHINode *IndVar,
+ BasicBlock *ExitingBlock,
+ BranchInst *BI,
+ SCEVExpander &Rewriter);
+ void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
+
+ void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
+
+ void SinkUnusedInvariants(Loop *L);
+
+ void HandleFloatingPointIV(Loop *L, PHINode *PH);
+ };
+}
+
+char IndVarSimplify::ID = 0;
+INITIALIZE_PASS(IndVarSimplify, "indvars",
+ "Canonicalize Induction Variables", false, false);
+
+Pass *llvm::createIndVarSimplifyPass() {
+ return new IndVarSimplify();
+}
+
+/// LinearFunctionTestReplace - This method rewrites the exit condition of the
+/// loop to be a canonical != comparison against the incremented loop induction
+/// variable. This pass is able to rewrite the exit tests of any loop where the
+/// SCEV analysis can determine a loop-invariant trip count of the loop, which
+/// is actually a much broader range than just linear tests.
+ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
+ const SCEV *BackedgeTakenCount,
+ PHINode *IndVar,
+ BasicBlock *ExitingBlock,
+ BranchInst *BI,
+ SCEVExpander &Rewriter) {
+ // Special case: If the backedge-taken count is a UDiv, it's very likely a
+ // UDiv that ScalarEvolution produced in order to compute a precise
+ // expression, rather than a UDiv from the user's code. If we can't find a
+ // UDiv in the code with some simple searching, assume the former and forego
+ // rewriting the loop.
+ if (isa<SCEVUDivExpr>(BackedgeTakenCount)) {
+ ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!OrigCond) return 0;
+ const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
+ R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
+ if (R != BackedgeTakenCount) {
+ const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
+ L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
+ if (L != BackedgeTakenCount)
+ return 0;
+ }
+ }
+
+ // If the exiting block is not the same as the backedge block, we must compare
+ // against the preincremented value, otherwise we prefer to compare against
+ // the post-incremented value.
+ Value *CmpIndVar;
+ const SCEV *RHS = BackedgeTakenCount;
+ if (ExitingBlock == L->getLoopLatch()) {
+ // Add one to the "backedge-taken" count to get the trip count.
+ // If this addition may overflow, we have to be more pessimistic and
+ // cast the induction variable before doing the add.
+ const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0);
+ const SCEV *N =
+ SE->getAddExpr(BackedgeTakenCount,
+ SE->getConstant(BackedgeTakenCount->getType(), 1));
+ if ((isa<SCEVConstant>(N) && !N->isZero()) ||
+ SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
+ // No overflow. Cast the sum.
+ RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
+ } else {
+ // Potential overflow. Cast before doing the add.
+ RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
+ IndVar->getType());
+ RHS = SE->getAddExpr(RHS,
+ SE->getConstant(IndVar->getType(), 1));
+ }
+
+ // The BackedgeTaken expression contains the number of times that the
+ // backedge branches to the loop header. This is one less than the
+ // number of times the loop executes, so use the incremented indvar.
+ CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBlock);
+ } else {
+ // We have to use the preincremented value...
+ RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
+ IndVar->getType());
+ CmpIndVar = IndVar;
+ }
+
+ // Expand the code for the iteration count.
+ assert(RHS->isLoopInvariant(L) &&
+ "Computed iteration count is not loop invariant!");
+ Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
+
+ // Insert a new icmp_ne or icmp_eq instruction before the branch.
+ ICmpInst::Predicate Opcode;
+ if (L->contains(BI->getSuccessor(0)))
+ Opcode = ICmpInst::ICMP_NE;
+ else
+ Opcode = ICmpInst::ICMP_EQ;
+
+ DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
+ << " LHS:" << *CmpIndVar << '\n'
+ << " op:\t"
+ << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
+ << " RHS:\t" << *RHS << "\n");
+
+ ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
+
+ Value *OrigCond = BI->getCondition();
+ // It's tempting to use replaceAllUsesWith here to fully replace the old
+ // comparison, but that's not immediately safe, since users of the old
+ // comparison may not be dominated by the new comparison. Instead, just
+ // update the branch to use the new comparison; in the common case this
+ // will make old comparison dead.
+ BI->setCondition(Cond);
+ RecursivelyDeleteTriviallyDeadInstructions(OrigCond);
+
+ ++NumLFTR;
+ Changed = true;
+ return Cond;
+}
+
+/// RewriteLoopExitValues - Check to see if this loop has a computable
+/// loop-invariant execution count. If so, this means that we can compute the
+/// final value of any expressions that are recurrent in the loop, and
+/// substitute the exit values from the loop into any instructions outside of
+/// the loop that use the final values of the current expressions.
+///
+/// This is mostly redundant with the regular IndVarSimplify activities that
+/// happen later, except that it's more powerful in some cases, because it's
+/// able to brute-force evaluate arbitrary instructions as long as they have
+/// constant operands at the beginning of the loop.
+void IndVarSimplify::RewriteLoopExitValues(Loop *L,
+ SCEVExpander &Rewriter) {
+ // Verify the input to the pass in already in LCSSA form.
+ assert(L->isLCSSAForm(*DT));
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getUniqueExitBlocks(ExitBlocks);
+
+ // Find all values that are computed inside the loop, but used outside of it.
+ // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
+ // the exit blocks of the loop to find them.
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBB = ExitBlocks[i];
+
+ // If there are no PHI nodes in this exit block, then no values defined
+ // inside the loop are used on this path, skip it.
+ PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
+ if (!PN) continue;
+
+ unsigned NumPreds = PN->getNumIncomingValues();
+
+ // Iterate over all of the PHI nodes.
+ BasicBlock::iterator BBI = ExitBB->begin();
+ while ((PN = dyn_cast<PHINode>(BBI++))) {
+ if (PN->use_empty())
+ continue; // dead use, don't replace it
+
+ // SCEV only supports integer expressions for now.
+ if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy())
+ continue;
+
+ // It's necessary to tell ScalarEvolution about this explicitly so that
+ // it can walk the def-use list and forget all SCEVs, as it may not be
+ // watching the PHI itself. Once the new exit value is in place, there
+ // may not be a def-use connection between the loop and every instruction
+ // which got a SCEVAddRecExpr for that loop.
+ SE->forgetValue(PN);
+
+ // Iterate over all of the values in all the PHI nodes.
+ for (unsigned i = 0; i != NumPreds; ++i) {
+ // If the value being merged in is not integer or is not defined
+ // in the loop, skip it.
+ Value *InVal = PN->getIncomingValue(i);
+ if (!isa<Instruction>(InVal))
+ continue;
+
+ // If this pred is for a subloop, not L itself, skip it.
+ if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
+ continue; // The Block is in a subloop, skip it.
+
+ // Check that InVal is defined in the loop.
+ Instruction *Inst = cast<Instruction>(InVal);
+ if (!L->contains(Inst))
+ continue;
+
+ // Okay, this instruction has a user outside of the current loop
+ // and varies predictably *inside* the loop. Evaluate the value it
+ // contains when the loop exits, if possible.
+ const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
+ if (!ExitValue->isLoopInvariant(L))
+ continue;
+
+ Changed = true;
+ ++NumReplaced;
+
+ Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
+
+ DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
+ << " LoopVal = " << *Inst << "\n");
+
+ PN->setIncomingValue(i, ExitVal);
+
+ // If this instruction is dead now, delete it.
+ RecursivelyDeleteTriviallyDeadInstructions(Inst);
+
+ if (NumPreds == 1) {
+ // Completely replace a single-pred PHI. This is safe, because the
+ // NewVal won't be variant in the loop, so we don't need an LCSSA phi
+ // node anymore.
+ PN->replaceAllUsesWith(ExitVal);
+ RecursivelyDeleteTriviallyDeadInstructions(PN);
+ }
+ }
+ if (NumPreds != 1) {
+ // Clone the PHI and delete the original one. This lets IVUsers and
+ // any other maps purge the original user from their records.
+ PHINode *NewPN = cast<PHINode>(PN->clone());
+ NewPN->takeName(PN);
+ NewPN->insertBefore(PN);
+ PN->replaceAllUsesWith(NewPN);
+ PN->eraseFromParent();
+ }
+ }
+ }
+
+ // The insertion point instruction may have been deleted; clear it out
+ // so that the rewriter doesn't trip over it later.
+ Rewriter.clearInsertPoint();
+}
+
+void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
+ // First step. Check to see if there are any floating-point recurrences.
+ // If there are, change them into integer recurrences, permitting analysis by
+ // the SCEV routines.
+ //
+ BasicBlock *Header = L->getHeader();
+
+ SmallVector<WeakVH, 8> PHIs;
+ for (BasicBlock::iterator I = Header->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PHIs.push_back(PN);
+
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+ if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i]))
+ HandleFloatingPointIV(L, PN);
+
+ // If the loop previously had floating-point IV, ScalarEvolution
+ // may not have been able to compute a trip count. Now that we've done some
+ // re-writing, the trip count may be computable.
+ if (Changed)
+ SE->forgetLoop(L);
+}
+
+void IndVarSimplify::EliminateIVComparisons() {
+ SmallVector<WeakVH, 16> DeadInsts;
+
+ // Look for ICmp users.
+ for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
+ IVStrideUse &UI = *I;
+ ICmpInst *ICmp = dyn_cast<ICmpInst>(UI.getUser());
+ if (!ICmp) continue;
+
+ bool Swapped = UI.getOperandValToReplace() == ICmp->getOperand(1);
+ ICmpInst::Predicate Pred = ICmp->getPredicate();
+ if (Swapped) Pred = ICmpInst::getSwappedPredicate(Pred);
+
+ // Get the SCEVs for the ICmp operands.
+ const SCEV *S = IU->getReplacementExpr(UI);
+ const SCEV *X = SE->getSCEV(ICmp->getOperand(!Swapped));
+
+ // Simplify unnecessary loops away.
+ const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
+ S = SE->getSCEVAtScope(S, ICmpLoop);
+ X = SE->getSCEVAtScope(X, ICmpLoop);
+
+ // If the condition is always true or always false, replace it with
+ // a constant value.
+ if (SE->isKnownPredicate(Pred, S, X))
+ ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
+ else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X))
+ ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
+ else
+ continue;
+
+ DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+ DeadInsts.push_back(ICmp);
+ }
+
+ // Now that we're done iterating through lists, clean up any instructions
+ // which are now dead.
+ while (!DeadInsts.empty())
+ if (Instruction *Inst =
+ dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+ RecursivelyDeleteTriviallyDeadInstructions(Inst);
+}
+
+void IndVarSimplify::EliminateIVRemainders() {
+ SmallVector<WeakVH, 16> DeadInsts;
+
+ // Look for SRem and URem users.
+ for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
+ IVStrideUse &UI = *I;
+ BinaryOperator *Rem = dyn_cast<BinaryOperator>(UI.getUser());
+ if (!Rem) continue;
+
+ bool isSigned = Rem->getOpcode() == Instruction::SRem;
+ if (!isSigned && Rem->getOpcode() != Instruction::URem)
+ continue;
+
+ // We're only interested in the case where we know something about
+ // the numerator.
+ if (UI.getOperandValToReplace() != Rem->getOperand(0))
+ continue;
+
+ // Get the SCEVs for the ICmp operands.
+ const SCEV *S = SE->getSCEV(Rem->getOperand(0));
+ const SCEV *X = SE->getSCEV(Rem->getOperand(1));
+
+ // Simplify unnecessary loops away.
+ const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
+ S = SE->getSCEVAtScope(S, ICmpLoop);
+ X = SE->getSCEVAtScope(X, ICmpLoop);
+
+ // i % n --> i if i is in [0,n).
+ if ((!isSigned || SE->isKnownNonNegative(S)) &&
+ SE->isKnownPredicate(isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ S, X))
+ Rem->replaceAllUsesWith(Rem->getOperand(0));
+ else {
+ // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
+ const SCEV *LessOne =
+ SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1));
+ if ((!isSigned || SE->isKnownNonNegative(LessOne)) &&
+ SE->isKnownPredicate(isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ LessOne, X)) {
+ ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ,
+ Rem->getOperand(0), Rem->getOperand(1),
+ "tmp");
+ SelectInst *Sel =
+ SelectInst::Create(ICmp,
+ ConstantInt::get(Rem->getType(), 0),
+ Rem->getOperand(0), "tmp", Rem);
+ Rem->replaceAllUsesWith(Sel);
+ } else
+ continue;
+ }
+
+ // Inform IVUsers about the new users.
+ if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
+ IU->AddUsersIfInteresting(I);
+
+ DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
+ DeadInsts.push_back(Rem);
+ }
+
+ // Now that we're done iterating through lists, clean up any instructions
+ // which are now dead.
+ while (!DeadInsts.empty())
+ if (Instruction *Inst =
+ dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+ RecursivelyDeleteTriviallyDeadInstructions(Inst);
+}
+
+bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
+ // If LoopSimplify form is not available, stay out of trouble. Some notes:
+ // - LSR currently only supports LoopSimplify-form loops. Indvars'
+ // canonicalization can be a pessimization without LSR to "clean up"
+ // afterwards.
+ // - We depend on having a preheader; in particular,
+ // Loop::getCanonicalInductionVariable only supports loops with preheaders,
+ // and we're in trouble if we can't find the induction variable even when
+ // we've manually inserted one.
+ if (!L->isLoopSimplifyForm())
+ return false;
+
+ IU = &getAnalysis<IVUsers>();
+ LI = &getAnalysis<LoopInfo>();
+ SE = &getAnalysis<ScalarEvolution>();
+ DT = &getAnalysis<DominatorTree>();
+ Changed = false;
+
+ // If there are any floating-point recurrences, attempt to
+ // transform them to use integer recurrences.
+ RewriteNonIntegerIVs(L);
+
+ BasicBlock *ExitingBlock = L->getExitingBlock(); // may be null
+ const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+
+ // Create a rewriter object which we'll use to transform the code with.
+ SCEVExpander Rewriter(*SE);
+
+ // Check to see if this loop has a computable loop-invariant execution count.
+ // If so, this means that we can compute the final value of any expressions
+ // that are recurrent in the loop, and substitute the exit values from the
+ // loop into any instructions outside of the loop that use the final values of
+ // the current expressions.
+ //
+ if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+ RewriteLoopExitValues(L, Rewriter);
+
+ // Simplify ICmp IV users.
+ EliminateIVComparisons();
+
+ // Simplify SRem and URem IV users.
+ EliminateIVRemainders();
+
+ // Compute the type of the largest recurrence expression, and decide whether
+ // a canonical induction variable should be inserted.
+ const Type *LargestType = 0;
+ bool NeedCannIV = false;
+ if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
+ LargestType = BackedgeTakenCount->getType();
+ LargestType = SE->getEffectiveSCEVType(LargestType);
+ // If we have a known trip count and a single exit block, we'll be
+ // rewriting the loop exit test condition below, which requires a
+ // canonical induction variable.
+ if (ExitingBlock)
+ NeedCannIV = true;
+ }
+ for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
+ const Type *Ty =
+ SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType());
+ if (!LargestType ||
+ SE->getTypeSizeInBits(Ty) >
+ SE->getTypeSizeInBits(LargestType))
+ LargestType = Ty;
+ NeedCannIV = true;
+ }
+
+ // Now that we know the largest of the induction variable expressions
+ // in this loop, insert a canonical induction variable of the largest size.
+ PHINode *IndVar = 0;
+ if (NeedCannIV) {
+ // Check to see if the loop already has any canonical-looking induction
+ // variables. If any are present and wider than the planned canonical
+ // induction variable, temporarily remove them, so that the Rewriter
+ // doesn't attempt to reuse them.
+ SmallVector<PHINode *, 2> OldCannIVs;
+ while (PHINode *OldCannIV = L->getCanonicalInductionVariable()) {
+ if (SE->getTypeSizeInBits(OldCannIV->getType()) >
+ SE->getTypeSizeInBits(LargestType))
+ OldCannIV->removeFromParent();
+ else
+ break;
+ OldCannIVs.push_back(OldCannIV);
+ }
+
+ IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType);
+
+ ++NumInserted;
+ Changed = true;
+ DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n');
+
+ // Now that the official induction variable is established, reinsert
+ // any old canonical-looking variables after it so that the IR remains
+ // consistent. They will be deleted as part of the dead-PHI deletion at
+ // the end of the pass.
+ while (!OldCannIVs.empty()) {
+ PHINode *OldCannIV = OldCannIVs.pop_back_val();
+ OldCannIV->insertBefore(L->getHeader()->getFirstNonPHI());
+ }
+ }
+
+ // If we have a trip count expression, rewrite the loop's exit condition
+ // using it. We can currently only handle loops with a single exit.
+ ICmpInst *NewICmp = 0;
+ if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
+ !BackedgeTakenCount->isZero() &&
+ ExitingBlock) {
+ assert(NeedCannIV &&
+ "LinearFunctionTestReplace requires a canonical induction variable");
+ // Can't rewrite non-branch yet.
+ if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()))
+ NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
+ ExitingBlock, BI, Rewriter);
+ }
+
+ // Rewrite IV-derived expressions. Clears the rewriter cache.
+ RewriteIVExpressions(L, Rewriter);
+
+ // The Rewriter may not be used from this point on.
+
+ // Loop-invariant instructions in the preheader that aren't used in the
+ // loop may be sunk below the loop to reduce register pressure.
+ SinkUnusedInvariants(L);
+
+ // For completeness, inform IVUsers of the IV use in the newly-created
+ // loop exit test instruction.
+ if (NewICmp)
+ IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)));
+
+ // Clean up dead instructions.
+ Changed |= DeleteDeadPHIs(L->getHeader());
+ // Check a post-condition.
+ assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!");
+ return Changed;
+}
+
+// FIXME: It is an extremely bad idea to indvar substitute anything more
+// complex than affine induction variables. Doing so will put expensive
+// polynomial evaluations inside of the loop, and the str reduction pass
+// currently can only reduce affine polynomials. For now just disable
+// indvar subst on anything more complex than an affine addrec, unless
+// it can be expanded to a trivial value.
+static bool isSafe(const SCEV *S, const Loop *L) {
+ // Loop-invariant values are safe.
+ if (S->isLoopInvariant(L)) return true;
+
+ // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
+ // to transform them into efficient code.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+ return AR->isAffine();
+
+ // An add is safe it all its operands are safe.
+ if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
+ for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
+ E = Commutative->op_end(); I != E; ++I)
+ if (!isSafe(*I, L)) return false;
+ return true;
+ }
+
+ // A cast is safe if its operand is.
+ if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+ return isSafe(C->getOperand(), L);
+
+ // A udiv is safe if its operands are.
+ if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
+ return isSafe(UD->getLHS(), L) &&
+ isSafe(UD->getRHS(), L);
+
+ // SCEVUnknown is always safe.
+ if (isa<SCEVUnknown>(S))
+ return true;
+
+ // Nothing else is safe.
+ return false;
+}
+
+void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
+ SmallVector<WeakVH, 16> DeadInsts;
+
+ // Rewrite all induction variable expressions in terms of the canonical
+ // induction variable.
+ //
+ // If there were induction variables of other sizes or offsets, manually
+ // add the offsets to the primary induction variable and cast, avoiding
+ // the need for the code evaluation methods to insert induction variables
+ // of different sizes.
+ for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
+ Value *Op = UI->getOperandValToReplace();
+ const Type *UseTy = Op->getType();
+ Instruction *User = UI->getUser();
+
+ // Compute the final addrec to expand into code.
+ const SCEV *AR = IU->getReplacementExpr(*UI);
+
+ // Evaluate the expression out of the loop, if possible.
+ if (!L->contains(UI->getUser())) {
+ const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
+ if (ExitVal->isLoopInvariant(L))
+ AR = ExitVal;
+ }
+
+ // FIXME: It is an extremely bad idea to indvar substitute anything more
+ // complex than affine induction variables. Doing so will put expensive
+ // polynomial evaluations inside of the loop, and the str reduction pass
+ // currently can only reduce affine polynomials. For now just disable
+ // indvar subst on anything more complex than an affine addrec, unless
+ // it can be expanded to a trivial value.
+ if (!isSafe(AR, L))
+ continue;
+
+ // Determine the insertion point for this user. By default, insert
+ // immediately before the user. The SCEVExpander class will automatically
+ // hoist loop invariants out of the loop. For PHI nodes, there may be
+ // multiple uses, so compute the nearest common dominator for the
+ // incoming blocks.
+ Instruction *InsertPt = User;
+ if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+ if (PHI->getIncomingValue(i) == Op) {
+ if (InsertPt == User)
+ InsertPt = PHI->getIncomingBlock(i)->getTerminator();
+ else
+ InsertPt =
+ DT->findNearestCommonDominator(InsertPt->getParent(),
+ PHI->getIncomingBlock(i))
+ ->getTerminator();
+ }
+
+ // Now expand it into actual Instructions and patch it into place.
+ Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
+
+ // Inform ScalarEvolution that this value is changing. The change doesn't
+ // affect its value, but it does potentially affect which use lists the
+ // value will be on after the replacement, which affects ScalarEvolution's
+ // ability to walk use lists and drop dangling pointers when a value is
+ // deleted.
+ SE->forgetValue(User);
+
+ // Patch the new value into place.
+ if (Op->hasName())
+ NewVal->takeName(Op);
+ User->replaceUsesOfWith(Op, NewVal);
+ UI->setOperandValToReplace(NewVal);
+ DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
+ << " into = " << *NewVal << "\n");
+ ++NumRemoved;
+ Changed = true;
+
+ // The old value may be dead now.
+ DeadInsts.push_back(Op);
+ }
+
+ // Clear the rewriter cache, because values that are in the rewriter's cache
+ // can be deleted in the loop below, causing the AssertingVH in the cache to
+ // trigger.
+ Rewriter.clear();
+ // Now that we're done iterating through lists, clean up any instructions
+ // which are now dead.
+ while (!DeadInsts.empty())
+ if (Instruction *Inst =
+ dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+ RecursivelyDeleteTriviallyDeadInstructions(Inst);
+}
+
+/// If there's a single exit block, sink any loop-invariant values that
+/// were defined in the preheader but not used inside the loop into the
+/// exit block to reduce register pressure in the loop.
+void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
+ BasicBlock *ExitBlock = L->getExitBlock();
+ if (!ExitBlock) return;
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) return;
+
+ Instruction *InsertPt = ExitBlock->getFirstNonPHI();
+ BasicBlock::iterator I = Preheader->getTerminator();
+ while (I != Preheader->begin()) {
+ --I;
+ // New instructions were inserted at the end of the preheader.
+ if (isa<PHINode>(I))
+ break;
+
+ // Don't move instructions which might have side effects, since the side
+ // effects need to complete before instructions inside the loop. Also don't
+ // move instructions which might read memory, since the loop may modify
+ // memory. Note that it's okay if the instruction might have undefined
+ // behavior: LoopSimplify guarantees that the preheader dominates the exit
+ // block.
+ if (I->mayHaveSideEffects() || I->mayReadFromMemory())
+ continue;
+
+ // Skip debug info intrinsics.
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+
+ // Don't sink static AllocaInsts out of the entry block, which would
+ // turn them into dynamic allocas!
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+ if (AI->isStaticAlloca())
+ continue;
+
+ // Determine if there is a use in or before the loop (direct or
+ // otherwise).
+ bool UsedInLoop = false;
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI) {
+ User *U = *UI;
+ BasicBlock *UseBB = cast<Instruction>(U)->getParent();
+ if (PHINode *P = dyn_cast<PHINode>(U)) {
+ unsigned i =
+ PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
+ UseBB = P->getIncomingBlock(i);
+ }
+ if (UseBB == Preheader || L->contains(UseBB)) {
+ UsedInLoop = true;
+ break;
+ }
+ }
+
+ // If there is, the def must remain in the preheader.
+ if (UsedInLoop)
+ continue;
+
+ // Otherwise, sink it to the exit block.
+ Instruction *ToMove = I;
+ bool Done = false;
+
+ if (I != Preheader->begin()) {
+ // Skip debug info intrinsics.
+ do {
+ --I;
+ } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
+
+ if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
+ Done = true;
+ } else {
+ Done = true;
+ }
+
+ ToMove->moveBefore(InsertPt);
+ if (Done) break;
+ InsertPt = ToMove;
+ }
+}
+
+/// ConvertToSInt - Convert APF to an integer, if possible.
+static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
+ bool isExact = false;
+ if (&APF.getSemantics() == &APFloat::PPCDoubleDouble)
+ return false;
+ // See if we can convert this to an int64_t
+ uint64_t UIntVal;
+ if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
+ &isExact) != APFloat::opOK || !isExact)
+ return false;
+ IntVal = UIntVal;
+ return true;
+}
+
+/// HandleFloatingPointIV - If the loop has floating induction variable
+/// then insert corresponding integer induction variable if possible.
+/// For example,
+/// for(double i = 0; i < 10000; ++i)
+/// bar(i)
+/// is converted into
+/// for(int i = 0; i < 10000; ++i)
+/// bar((double)i);
+///
+void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
+ unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
+ unsigned BackEdge = IncomingEdge^1;
+
+ // Check incoming value.
+ ConstantFP *InitValueVal =
+ dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
+
+ int64_t InitValue;
+ if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
+ return;
+
+ // Check IV increment. Reject this PN if increment operation is not
+ // an add or increment value can not be represented by an integer.
+ BinaryOperator *Incr =
+ dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
+ if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
+
+ // If this is not an add of the PHI with a constantfp, or if the constant fp
+ // is not an integer, bail out.
+ ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
+ int64_t IncValue;
+ if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
+ !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
+ return;
+
+ // Check Incr uses. One user is PN and the other user is an exit condition
+ // used by the conditional terminator.
+ Value::use_iterator IncrUse = Incr->use_begin();
+ Instruction *U1 = cast<Instruction>(*IncrUse++);
+ if (IncrUse == Incr->use_end()) return;
+ Instruction *U2 = cast<Instruction>(*IncrUse++);
+ if (IncrUse != Incr->use_end()) return;
+
+ // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
+ // only used by a branch, we can't transform it.
+ FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
+ if (!Compare)
+ Compare = dyn_cast<FCmpInst>(U2);
+ if (Compare == 0 || !Compare->hasOneUse() ||
+ !isa<BranchInst>(Compare->use_back()))
+ return;
+
+ BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
+
+ // We need to verify that the branch actually controls the iteration count
+ // of the loop. If not, the new IV can overflow and no one will notice.
+ // The branch block must be in the loop and one of the successors must be out
+ // of the loop.
+ assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
+ if (!L->contains(TheBr->getParent()) ||
+ (L->contains(TheBr->getSuccessor(0)) &&
+ L->contains(TheBr->getSuccessor(1))))
+ return;
+
+
+ // If it isn't a comparison with an integer-as-fp (the exit value), we can't
+ // transform it.
+ ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
+ int64_t ExitValue;
+ if (ExitValueVal == 0 ||
+ !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
+ return;
+
+ // Find new predicate for integer comparison.
+ CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
+ switch (Compare->getPredicate()) {
+ default: return; // Unknown comparison.
+ case CmpInst::FCMP_OEQ:
+ case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
+ case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
+ case CmpInst::FCMP_OGE:
+ case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
+ case CmpInst::FCMP_OLT:
+ case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
+ case CmpInst::FCMP_OLE:
+ case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
+ }
+
+ // We convert the floating point induction variable to a signed i32 value if
+ // we can. This is only safe if the comparison will not overflow in a way
+ // that won't be trapped by the integer equivalent operations. Check for this
+ // now.
+ // TODO: We could use i64 if it is native and the range requires it.
+
+ // The start/stride/exit values must all fit in signed i32.
+ if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
+ return;
+
+ // If not actually striding (add x, 0.0), avoid touching the code.
+ if (IncValue == 0)
+ return;
+
+ // Positive and negative strides have different safety conditions.
+ if (IncValue > 0) {
+ // If we have a positive stride, we require the init to be less than the
+ // exit value and an equality or less than comparison.
+ if (InitValue >= ExitValue ||
+ NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
+ return;
+
+ uint32_t Range = uint32_t(ExitValue-InitValue);
+ if (NewPred == CmpInst::ICMP_SLE) {
+ // Normalize SLE -> SLT, check for infinite loop.
+ if (++Range == 0) return; // Range overflows.
+ }
+
+ unsigned Leftover = Range % uint32_t(IncValue);
+
+ // If this is an equality comparison, we require that the strided value
+ // exactly land on the exit value, otherwise the IV condition will wrap
+ // around and do things the fp IV wouldn't.
+ if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
+ Leftover != 0)
+ return;
+
+ // If the stride would wrap around the i32 before exiting, we can't
+ // transform the IV.
+ if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
+ return;
+
+ } else {
+ // If we have a negative stride, we require the init to be greater than the
+ // exit value and an equality or greater than comparison.
+ if (InitValue >= ExitValue ||
+ NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
+ return;
+
+ uint32_t Range = uint32_t(InitValue-ExitValue);
+ if (NewPred == CmpInst::ICMP_SGE) {
+ // Normalize SGE -> SGT, check for infinite loop.
+ if (++Range == 0) return; // Range overflows.
+ }
+
+ unsigned Leftover = Range % uint32_t(-IncValue);
+
+ // If this is an equality comparison, we require that the strided value
+ // exactly land on the exit value, otherwise the IV condition will wrap
+ // around and do things the fp IV wouldn't.
+ if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
+ Leftover != 0)
+ return;
+
+ // If the stride would wrap around the i32 before exiting, we can't
+ // transform the IV.
+ if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
+ return;
+ }
+
+ const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
+
+ // Insert new integer induction variable.
+ PHINode *NewPHI = PHINode::Create(Int32Ty, PN->getName()+".int", PN);
+ NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
+ PN->getIncomingBlock(IncomingEdge));
+
+ Value *NewAdd =
+ BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
+ Incr->getName()+".int", Incr);
+ NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
+
+ ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
+ ConstantInt::get(Int32Ty, ExitValue),
+ Compare->getName());
+
+ // In the following deletions, PN may become dead and may be deleted.
+ // Use a WeakVH to observe whether this happens.
+ WeakVH WeakPH = PN;
+
+ // Delete the old floating point exit comparison. The branch starts using the
+ // new comparison.
+ NewCompare->takeName(Compare);
+ Compare->replaceAllUsesWith(NewCompare);
+ RecursivelyDeleteTriviallyDeadInstructions(Compare);
+
+ // Delete the old floating point increment.
+ Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
+ RecursivelyDeleteTriviallyDeadInstructions(Incr);
+
+ // If the FP induction variable still has uses, this is because something else
+ // in the loop uses its value. In order to canonicalize the induction
+ // variable, we chose to eliminate the IV and rewrite it in terms of an
+ // int->fp cast.
+ //
+ // We give preference to sitofp over uitofp because it is faster on most
+ // platforms.
+ if (WeakPH) {
+ Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
+ PN->getParent()->getFirstNonPHI());
+ PN->replaceAllUsesWith(Conv);
+ RecursivelyDeleteTriviallyDeadInstructions(PN);
+ }
+
+ // Add a new IVUsers entry for the newly-created integer PHI.
+ IU->AddUsersIfInteresting(NewPHI);
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
new file mode 100644
index 0000000..104d5ae
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -0,0 +1,1706 @@
+//===- JumpThreading.cpp - Thread control through conditional blocks ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Jump Threading pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jump-threading"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumThreads, "Number of jumps threaded");
+STATISTIC(NumFolds, "Number of terminators folded");
+STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
+
+static cl::opt<unsigned>
+Threshold("jump-threading-threshold",
+ cl::desc("Max block size to duplicate for jump threading"),
+ cl::init(6), cl::Hidden);
+
+// Turn on use of LazyValueInfo.
+static cl::opt<bool>
+EnableLVI("enable-jump-threading-lvi",
+ cl::desc("Use LVI for jump threading"),
+ cl::init(true),
+ cl::ReallyHidden);
+
+
+
+namespace {
+ /// This pass performs 'jump threading', which looks at blocks that have
+ /// multiple predecessors and multiple successors. If one or more of the
+ /// predecessors of the block can be proven to always jump to one of the
+ /// successors, we forward the edge from the predecessor to the successor by
+ /// duplicating the contents of this block.
+ ///
+ /// An example of when this can occur is code like this:
+ ///
+ /// if () { ...
+ /// X = 4;
+ /// }
+ /// if (X < 3) {
+ ///
+ /// In this case, the unconditional branch at the end of the first if can be
+ /// revectored to the false side of the second if.
+ ///
+ class JumpThreading : public FunctionPass {
+ TargetData *TD;
+ LazyValueInfo *LVI;
+#ifdef NDEBUG
+ SmallPtrSet<BasicBlock*, 16> LoopHeaders;
+#else
+ SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
+#endif
+ DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
+
+ // RAII helper for updating the recursion stack.
+ struct RecursionSetRemover {
+ DenseSet<std::pair<Value*, BasicBlock*> > &TheSet;
+ std::pair<Value*, BasicBlock*> ThePair;
+
+ RecursionSetRemover(DenseSet<std::pair<Value*, BasicBlock*> > &S,
+ std::pair<Value*, BasicBlock*> P)
+ : TheSet(S), ThePair(P) { }
+
+ ~RecursionSetRemover() {
+ TheSet.erase(ThePair);
+ }
+ };
+ public:
+ static char ID; // Pass identification
+ JumpThreading() : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ if (EnableLVI) {
+ AU.addRequired<LazyValueInfo>();
+ AU.addPreserved<LazyValueInfo>();
+ }
+ }
+
+ void FindLoopHeaders(Function &F);
+ bool ProcessBlock(BasicBlock *BB);
+ bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
+ BasicBlock *SuccBB);
+ bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock *> &PredBBs);
+
+ typedef SmallVectorImpl<std::pair<ConstantInt*,
+ BasicBlock*> > PredValueInfo;
+
+ bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
+ PredValueInfo &Result);
+ bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB);
+
+
+ bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
+ bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
+
+ bool ProcessBranchOnPHI(PHINode *PN);
+ bool ProcessBranchOnXOR(BinaryOperator *BO);
+
+ bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
+ };
+}
+
+char JumpThreading::ID = 0;
+INITIALIZE_PASS(JumpThreading, "jump-threading",
+ "Jump Threading", false, false);
+
+// Public interface to the Jump Threading pass
+FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
+
+/// runOnFunction - Top level algorithm.
+///
+bool JumpThreading::runOnFunction(Function &F) {
+ DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
+ TD = getAnalysisIfAvailable<TargetData>();
+ LVI = EnableLVI ? &getAnalysis<LazyValueInfo>() : 0;
+
+ FindLoopHeaders(F);
+
+ bool Changed, EverChanged = false;
+ do {
+ Changed = false;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
+ BasicBlock *BB = I;
+ // Thread all of the branches we can over this block.
+ while (ProcessBlock(BB))
+ Changed = true;
+
+ ++I;
+
+ // If the block is trivially dead, zap it. This eliminates the successor
+ // edges which simplifies the CFG.
+ if (pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock()) {
+ DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName()
+ << "' with terminator: " << *BB->getTerminator() << '\n');
+ LoopHeaders.erase(BB);
+ if (LVI) LVI->eraseBlock(BB);
+ DeleteDeadBlock(BB);
+ Changed = true;
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ // Can't thread an unconditional jump, but if the block is "almost
+ // empty", we can replace uses of it with uses of the successor and make
+ // this dead.
+ if (BI->isUnconditional() &&
+ BB != &BB->getParent()->getEntryBlock()) {
+ BasicBlock::iterator BBI = BB->getFirstNonPHI();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ // If the terminator is the only non-phi instruction, try to nuke it.
+ if (BBI->isTerminator()) {
+ // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
+ // block, we have to make sure it isn't in the LoopHeaders set. We
+ // reinsert afterward if needed.
+ bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
+ BasicBlock *Succ = BI->getSuccessor(0);
+
+ // FIXME: It is always conservatively correct to drop the info
+ // for a block even if it doesn't get erased. This isn't totally
+ // awesome, but it allows us to use AssertingVH to prevent nasty
+ // dangling pointer issues within LazyValueInfo.
+ if (LVI) LVI->eraseBlock(BB);
+ if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
+ Changed = true;
+ // If we deleted BB and BB was the header of a loop, then the
+ // successor is now the header of the loop.
+ BB = Succ;
+ }
+
+ if (ErasedFromLoopHeaders)
+ LoopHeaders.insert(BB);
+ }
+ }
+ }
+ }
+ EverChanged |= Changed;
+ } while (Changed);
+
+ LoopHeaders.clear();
+ return EverChanged;
+}
+
+/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to
+/// thread across it.
+static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
+ /// Ignore PHI nodes, these will be flattened when duplication happens.
+ BasicBlock::const_iterator I = BB->getFirstNonPHI();
+
+ // FIXME: THREADING will delete values that are just used to compute the
+ // branch, so they shouldn't count against the duplication cost.
+
+
+ // Sum up the cost of each instruction until we get to the terminator. Don't
+ // include the terminator because the copy won't include it.
+ unsigned Size = 0;
+ for (; !isa<TerminatorInst>(I); ++I) {
+ // Debugger intrinsics don't incur code size.
+ if (isa<DbgInfoIntrinsic>(I)) continue;
+
+ // If this is a pointer->pointer bitcast, it is free.
+ if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
+ continue;
+
+ // All other instructions count for at least one unit.
+ ++Size;
+
+ // Calls are more expensive. If they are non-intrinsic calls, we model them
+ // as having cost of 4. If they are a non-vector intrinsic, we model them
+ // as having cost of 2 total, and if they are a vector intrinsic, we model
+ // them as having cost 1.
+ if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (!isa<IntrinsicInst>(CI))
+ Size += 3;
+ else if (!CI->getType()->isVectorTy())
+ Size += 1;
+ }
+ }
+
+ // Threading through a switch statement is particularly profitable. If this
+ // block ends in a switch, decrease its cost to make it more likely to happen.
+ if (isa<SwitchInst>(I))
+ Size = Size > 6 ? Size-6 : 0;
+
+ return Size;
+}
+
+/// FindLoopHeaders - We do not want jump threading to turn proper loop
+/// structures into irreducible loops. Doing this breaks up the loop nesting
+/// hierarchy and pessimizes later transformations. To prevent this from
+/// happening, we first have to find the loop headers. Here we approximate this
+/// by finding targets of backedges in the CFG.
+///
+/// Note that there definitely are cases when we want to allow threading of
+/// edges across a loop header. For example, threading a jump from outside the
+/// loop (the preheader) to an exit block of the loop is definitely profitable.
+/// It is also almost always profitable to thread backedges from within the loop
+/// to exit blocks, and is often profitable to thread backedges to other blocks
+/// within the loop (forming a nested loop). This simple analysis is not rich
+/// enough to track all of these properties and keep it up-to-date as the CFG
+/// mutates, so we don't allow any of these transformations.
+///
+void JumpThreading::FindLoopHeaders(Function &F) {
+ SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
+ FindFunctionBackedges(F, Edges);
+
+ for (unsigned i = 0, e = Edges.size(); i != e; ++i)
+ LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
+}
+
+// Helper method for ComputeValueKnownInPredecessors. If Value is a
+// ConstantInt, push it. If it's an undef, push 0. Otherwise, do nothing.
+static void PushConstantIntOrUndef(SmallVectorImpl<std::pair<ConstantInt*,
+ BasicBlock*> > &Result,
+ Constant *Value, BasicBlock* BB){
+ if (ConstantInt *FoldedCInt = dyn_cast<ConstantInt>(Value))
+ Result.push_back(std::make_pair(FoldedCInt, BB));
+ else if (isa<UndefValue>(Value))
+ Result.push_back(std::make_pair((ConstantInt*)0, BB));
+}
+
+/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
+/// if we can infer that the value is a known ConstantInt in any of our
+/// predecessors. If so, return the known list of value and pred BB in the
+/// result vector. If a value is known to be undef, it is returned as null.
+///
+/// This returns true if there were any known values.
+///
+bool JumpThreading::
+ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
+ // This method walks up use-def chains recursively. Because of this, we could
+ // get into an infinite loop going around loops in the use-def chain. To
+ // prevent this, keep track of what (value, block) pairs we've already visited
+ // and terminate the search if we loop back to them
+ if (!RecursionSet.insert(std::make_pair(V, BB)).second)
+ return false;
+
+ // An RAII help to remove this pair from the recursion set once the recursion
+ // stack pops back out again.
+ RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
+
+ // If V is a constantint, then it is known in all predecessors.
+ if (isa<ConstantInt>(V) || isa<UndefValue>(V)) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(V);
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Result.push_back(std::make_pair(CI, *PI));
+
+ return true;
+ }
+
+ // If V is a non-instruction value, or an instruction in a different block,
+ // then it can't be derived from a PHI.
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0 || I->getParent() != BB) {
+
+ // Okay, if this is a live-in value, see if it has a known value at the end
+ // of any of our predecessors.
+ //
+ // FIXME: This should be an edge property, not a block end property.
+ /// TODO: Per PR2563, we could infer value range information about a
+ /// predecessor based on its terminator.
+ //
+ if (LVI) {
+ // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
+ // "I" is a non-local compare-with-a-constant instruction. This would be
+ // able to handle value inequalities better, for example if the compare is
+ // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
+ // Perhaps getConstantOnEdge should be smart enough to do this?
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ // If the value is known by LazyValueInfo to be a constant in a
+ // predecessor, use that information to try to thread this block.
+ Constant *PredCst = LVI->getConstantOnEdge(V, P, BB);
+ if (PredCst == 0 ||
+ (!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst)))
+ continue;
+
+ Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), P));
+ }
+
+ return !Result.empty();
+ }
+
+ return false;
+ }
+
+ /// If I is a PHI node, then we know the incoming values for any constants.
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *InVal = PN->getIncomingValue(i);
+ if (isa<ConstantInt>(InVal) || isa<UndefValue>(InVal)) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(InVal);
+ Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i)));
+ } else if (LVI) {
+ Constant *CI = LVI->getConstantOnEdge(InVal,
+ PN->getIncomingBlock(i), BB);
+ // LVI returns null is no value could be determined.
+ if (!CI) continue;
+ PushConstantIntOrUndef(Result, CI, PN->getIncomingBlock(i));
+ }
+ }
+
+ return !Result.empty();
+ }
+
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals, RHSVals;
+
+ // Handle some boolean conditions.
+ if (I->getType()->getPrimitiveSizeInBits() == 1) {
+ // X | true -> true
+ // X & false -> false
+ if (I->getOpcode() == Instruction::Or ||
+ I->getOpcode() == Instruction::And) {
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
+ ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals);
+
+ if (LHSVals.empty() && RHSVals.empty())
+ return false;
+
+ ConstantInt *InterestingVal;
+ if (I->getOpcode() == Instruction::Or)
+ InterestingVal = ConstantInt::getTrue(I->getContext());
+ else
+ InterestingVal = ConstantInt::getFalse(I->getContext());
+
+ SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
+
+ // Scan for the sentinel. If we find an undef, force it to the
+ // interesting value: x|undef -> true and x&undef -> false.
+ for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
+ if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) {
+ Result.push_back(LHSVals[i]);
+ Result.back().first = InterestingVal;
+ LHSKnownBBs.insert(LHSVals[i].second);
+ }
+ for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
+ if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) {
+ // If we already inferred a value for this block on the LHS, don't
+ // re-add it.
+ if (!LHSKnownBBs.count(RHSVals[i].second)) {
+ Result.push_back(RHSVals[i]);
+ Result.back().first = InterestingVal;
+ }
+ }
+
+ return !Result.empty();
+ }
+
+ // Handle the NOT form of XOR.
+ if (I->getOpcode() == Instruction::Xor &&
+ isa<ConstantInt>(I->getOperand(1)) &&
+ cast<ConstantInt>(I->getOperand(1))->isOne()) {
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result);
+ if (Result.empty())
+ return false;
+
+ // Invert the known values.
+ for (unsigned i = 0, e = Result.size(); i != e; ++i)
+ if (Result[i].first)
+ Result[i].first =
+ cast<ConstantInt>(ConstantExpr::getNot(Result[i].first));
+
+ return true;
+ }
+
+ // Try to simplify some other binary operator values.
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
+ ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals);
+
+ // Try to use constant folding to simplify the binary operator.
+ for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+ Constant *V = LHSVals[i].first ? LHSVals[i].first :
+ cast<Constant>(UndefValue::get(BO->getType()));
+ Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
+
+ PushConstantIntOrUndef(Result, Folded, LHSVals[i].second);
+ }
+ }
+
+ return !Result.empty();
+ }
+
+ // Handle compare with phi operand, where the PHI is defined in this block.
+ if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
+ PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0));
+ if (PN && PN->getParent() == BB) {
+ // We can do this simplification if any comparisons fold to true or false.
+ // See if any do.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *PredBB = PN->getIncomingBlock(i);
+ Value *LHS = PN->getIncomingValue(i);
+ Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
+
+ Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD);
+ if (Res == 0) {
+ if (!LVI || !isa<Constant>(RHS))
+ continue;
+
+ LazyValueInfo::Tristate
+ ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS,
+ cast<Constant>(RHS), PredBB, BB);
+ if (ResT == LazyValueInfo::Unknown)
+ continue;
+ Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
+ }
+
+ if (Constant *ConstRes = dyn_cast<Constant>(Res))
+ PushConstantIntOrUndef(Result, ConstRes, PredBB);
+ }
+
+ return !Result.empty();
+ }
+
+
+ // If comparing a live-in value against a constant, see if we know the
+ // live-in value on any predecessors.
+ if (LVI && isa<Constant>(Cmp->getOperand(1)) &&
+ Cmp->getType()->isIntegerTy()) {
+ if (!isa<Instruction>(Cmp->getOperand(0)) ||
+ cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) {
+ Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){
+ BasicBlock *P = *PI;
+ // If the value is known by LazyValueInfo to be a constant in a
+ // predecessor, use that information to try to thread this block.
+ LazyValueInfo::Tristate Res =
+ LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
+ RHSCst, P, BB);
+ if (Res == LazyValueInfo::Unknown)
+ continue;
+
+ Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
+ Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P));
+ }
+
+ return !Result.empty();
+ }
+
+ // Try to find a constant value for the LHS of a comparison,
+ // and evaluate it statically if we can.
+ if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) {
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
+
+ for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+ Constant *V = LHSVals[i].first ? LHSVals[i].first :
+ cast<Constant>(UndefValue::get(CmpConst->getType()));
+ Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
+ V, CmpConst);
+ PushConstantIntOrUndef(Result, Folded, LHSVals[i].second);
+ }
+
+ return !Result.empty();
+ }
+ }
+ }
+
+ if (LVI) {
+ // If all else fails, see if LVI can figure out a constant value for us.
+ Constant *CI = LVI->getConstant(V, BB);
+ ConstantInt *CInt = dyn_cast_or_null<ConstantInt>(CI);
+ if (CInt) {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Result.push_back(std::make_pair(CInt, *PI));
+ }
+
+ return !Result.empty();
+ }
+
+ return false;
+}
+
+
+
+/// GetBestDestForBranchOnUndef - If we determine that the specified block ends
+/// in an undefined jump, decide which block is best to revector to.
+///
+/// Since we can pick an arbitrary destination, we pick the successor with the
+/// fewest predecessors. This should reduce the in-degree of the others.
+///
+static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
+ TerminatorInst *BBTerm = BB->getTerminator();
+ unsigned MinSucc = 0;
+ BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
+ // Compute the successor with the minimum number of predecessors.
+ unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
+ for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
+ TestBB = BBTerm->getSuccessor(i);
+ unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
+ if (NumPreds < MinNumPreds)
+ MinSucc = i;
+ }
+
+ return MinSucc;
+}
+
+/// ProcessBlock - If there are any predecessors whose control can be threaded
+/// through to a successor, transform them now.
+bool JumpThreading::ProcessBlock(BasicBlock *BB) {
+ // If the block is trivially dead, just return and let the caller nuke it.
+ // This simplifies other transformations.
+ if (pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock())
+ return false;
+
+ // If this block has a single predecessor, and if that pred has a single
+ // successor, merge the blocks. This encourages recursive jump threading
+ // because now the condition in this block can be threaded through
+ // predecessors of our predecessor block.
+ if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
+ if (SinglePred->getTerminator()->getNumSuccessors() == 1 &&
+ SinglePred != BB) {
+ // If SinglePred was a loop header, BB becomes one.
+ if (LoopHeaders.erase(SinglePred))
+ LoopHeaders.insert(BB);
+
+ // Remember if SinglePred was the entry block of the function. If so, we
+ // will need to move BB back to the entry position.
+ bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+ if (LVI) LVI->eraseBlock(SinglePred);
+ MergeBasicBlockIntoOnlyPred(BB);
+
+ if (isEntry && BB != &BB->getParent()->getEntryBlock())
+ BB->moveBefore(&BB->getParent()->getEntryBlock());
+ return true;
+ }
+ }
+
+ // Look to see if the terminator is a branch of switch, if not we can't thread
+ // it.
+ Value *Condition;
+ if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ // Can't thread an unconditional jump.
+ if (BI->isUnconditional()) return false;
+ Condition = BI->getCondition();
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
+ Condition = SI->getCondition();
+ else
+ return false; // Must be an invoke.
+
+ // If the terminator of this block is branching on a constant, simplify the
+ // terminator to an unconditional branch. This can occur due to threading in
+ // other blocks.
+ if (isa<ConstantInt>(Condition)) {
+ DEBUG(dbgs() << " In block '" << BB->getName()
+ << "' folding terminator: " << *BB->getTerminator() << '\n');
+ ++NumFolds;
+ ConstantFoldTerminator(BB);
+ return true;
+ }
+
+ // If the terminator is branching on an undef, we can pick any of the
+ // successors to branch to. Let GetBestDestForJumpOnUndef decide.
+ if (isa<UndefValue>(Condition)) {
+ unsigned BestSucc = GetBestDestForJumpOnUndef(BB);
+
+ // Fold the branch/switch.
+ TerminatorInst *BBTerm = BB->getTerminator();
+ for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
+ if (i == BestSucc) continue;
+ RemovePredecessorAndSimplify(BBTerm->getSuccessor(i), BB, TD);
+ }
+
+ DEBUG(dbgs() << " In block '" << BB->getName()
+ << "' folding undef terminator: " << *BBTerm << '\n');
+ BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
+ BBTerm->eraseFromParent();
+ return true;
+ }
+
+ Instruction *CondInst = dyn_cast<Instruction>(Condition);
+
+ // If the condition is an instruction defined in another block, see if a
+ // predecessor has the same condition:
+ // br COND, BBX, BBY
+ // BBX:
+ // br COND, BBZ, BBW
+ if (!LVI &&
+ !Condition->hasOneUse() && // Multiple uses.
+ (CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition.
+ pred_iterator PI = pred_begin(BB), E = pred_end(BB);
+ if (isa<BranchInst>(BB->getTerminator())) {
+ for (; PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator()))
+ if (PBI->isConditional() && PBI->getCondition() == Condition &&
+ ProcessBranchOnDuplicateCond(P, BB))
+ return true;
+ }
+ } else {
+ assert(isa<SwitchInst>(BB->getTerminator()) && "Unknown jump terminator");
+ for (; PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ if (SwitchInst *PSI = dyn_cast<SwitchInst>(P->getTerminator()))
+ if (PSI->getCondition() == Condition &&
+ ProcessSwitchOnDuplicateCond(P, BB))
+ return true;
+ }
+ }
+ }
+
+ // All the rest of our checks depend on the condition being an instruction.
+ if (CondInst == 0) {
+ // FIXME: Unify this with code below.
+ if (LVI && ProcessThreadableEdges(Condition, BB))
+ return true;
+ return false;
+ }
+
+
+ if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
+ if (!LVI &&
+ (!isa<PHINode>(CondCmp->getOperand(0)) ||
+ cast<PHINode>(CondCmp->getOperand(0))->getParent() != BB)) {
+ // If we have a comparison, loop over the predecessors to see if there is
+ // a condition with a lexically identical value.
+ pred_iterator PI = pred_begin(BB), E = pred_end(BB);
+ for (; PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator()))
+ if (PBI->isConditional() && P != BB) {
+ if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) {
+ if (CI->getOperand(0) == CondCmp->getOperand(0) &&
+ CI->getOperand(1) == CondCmp->getOperand(1) &&
+ CI->getPredicate() == CondCmp->getPredicate()) {
+ // TODO: Could handle things like (x != 4) --> (x == 17)
+ if (ProcessBranchOnDuplicateCond(P, BB))
+ return true;
+ }
+ }
+ }
+ }
+ }
+
+ // For a comparison where the LHS is outside this block, it's possible
+ // that we've branched on it before. Used LVI to see if we can simplify
+ // the branch based on that.
+ BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
+ Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ if (LVI && CondBr && CondConst && CondBr->isConditional() && PI != PE &&
+ (!isa<Instruction>(CondCmp->getOperand(0)) ||
+ cast<Instruction>(CondCmp->getOperand(0))->getParent() != BB)) {
+ // For predecessor edge, determine if the comparison is true or false
+ // on that edge. If they're all true or all false, we can simplify the
+ // branch.
+ // FIXME: We could handle mixed true/false by duplicating code.
+ LazyValueInfo::Tristate Baseline =
+ LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0),
+ CondConst, *PI, BB);
+ if (Baseline != LazyValueInfo::Unknown) {
+ // Check that all remaining incoming values match the first one.
+ while (++PI != PE) {
+ LazyValueInfo::Tristate Ret = LVI->getPredicateOnEdge(
+ CondCmp->getPredicate(),
+ CondCmp->getOperand(0),
+ CondConst, *PI, BB);
+ if (Ret != Baseline) break;
+ }
+
+ // If we terminated early, then one of the values didn't match.
+ if (PI == PE) {
+ unsigned ToRemove = Baseline == LazyValueInfo::True ? 1 : 0;
+ unsigned ToKeep = Baseline == LazyValueInfo::True ? 0 : 1;
+ RemovePredecessorAndSimplify(CondBr->getSuccessor(ToRemove), BB, TD);
+ BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
+ CondBr->eraseFromParent();
+ return true;
+ }
+ }
+ }
+ }
+
+ // Check for some cases that are worth simplifying. Right now we want to look
+ // for loads that are used by a switch or by the condition for the branch. If
+ // we see one, check to see if it's partially redundant. If so, insert a PHI
+ // which can then be used to thread the values.
+ //
+ Value *SimplifyValue = CondInst;
+ if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
+ if (isa<Constant>(CondCmp->getOperand(1)))
+ SimplifyValue = CondCmp->getOperand(0);
+
+ // TODO: There are other places where load PRE would be profitable, such as
+ // more complex comparisons.
+ if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue))
+ if (SimplifyPartiallyRedundantLoad(LI))
+ return true;
+
+
+ // Handle a variety of cases where we are branching on something derived from
+ // a PHI node in the current block. If we can prove that any predecessors
+ // compute a predictable value based on a PHI node, thread those predecessors.
+ //
+ if (ProcessThreadableEdges(CondInst, BB))
+ return true;
+
+ // If this is an otherwise-unfoldable branch on a phi node in the current
+ // block, see if we can simplify.
+ if (PHINode *PN = dyn_cast<PHINode>(CondInst))
+ if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
+ return ProcessBranchOnPHI(PN);
+
+
+ // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
+ if (CondInst->getOpcode() == Instruction::Xor &&
+ CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
+ return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst));
+
+
+ // TODO: If we have: "br (X > 0)" and we have a predecessor where we know
+ // "(X == 4)", thread through this block.
+
+ return false;
+}
+
+/// ProcessBranchOnDuplicateCond - We found a block and a predecessor of that
+/// block that jump on exactly the same condition. This means that we almost
+/// always know the direction of the edge in the DESTBB:
+/// PREDBB:
+/// br COND, DESTBB, BBY
+/// DESTBB:
+/// br COND, BBZ, BBW
+///
+/// If DESTBB has multiple predecessors, we can't just constant fold the branch
+/// in DESTBB, we have to thread over it.
+bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB,
+ BasicBlock *BB) {
+ BranchInst *PredBI = cast<BranchInst>(PredBB->getTerminator());
+
+ // If both successors of PredBB go to DESTBB, we don't know anything. We can
+ // fold the branch to an unconditional one, which allows other recursive
+ // simplifications.
+ bool BranchDir;
+ if (PredBI->getSuccessor(1) != BB)
+ BranchDir = true;
+ else if (PredBI->getSuccessor(0) != BB)
+ BranchDir = false;
+ else {
+ DEBUG(dbgs() << " In block '" << PredBB->getName()
+ << "' folding terminator: " << *PredBB->getTerminator() << '\n');
+ ++NumFolds;
+ ConstantFoldTerminator(PredBB);
+ return true;
+ }
+
+ BranchInst *DestBI = cast<BranchInst>(BB->getTerminator());
+
+ // If the dest block has one predecessor, just fix the branch condition to a
+ // constant and fold it.
+ if (BB->getSinglePredecessor()) {
+ DEBUG(dbgs() << " In block '" << BB->getName()
+ << "' folding condition to '" << BranchDir << "': "
+ << *BB->getTerminator() << '\n');
+ ++NumFolds;
+ Value *OldCond = DestBI->getCondition();
+ DestBI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
+ BranchDir));
+ // Delete dead instructions before we fold the branch. Folding the branch
+ // can eliminate edges from the CFG which can end up deleting OldCond.
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+ ConstantFoldTerminator(BB);
+ return true;
+ }
+
+
+ // Next, figure out which successor we are threading to.
+ BasicBlock *SuccBB = DestBI->getSuccessor(!BranchDir);
+
+ SmallVector<BasicBlock*, 2> Preds;
+ Preds.push_back(PredBB);
+
+ // Ok, try to thread it!
+ return ThreadEdge(BB, Preds, SuccBB);
+}
+
+/// ProcessSwitchOnDuplicateCond - We found a block and a predecessor of that
+/// block that switch on exactly the same condition. This means that we almost
+/// always know the direction of the edge in the DESTBB:
+/// PREDBB:
+/// switch COND [... DESTBB, BBY ... ]
+/// DESTBB:
+/// switch COND [... BBZ, BBW ]
+///
+/// Optimizing switches like this is very important, because simplifycfg builds
+/// switches out of repeated 'if' conditions.
+bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB,
+ BasicBlock *DestBB) {
+ // Can't thread edge to self.
+ if (PredBB == DestBB)
+ return false;
+
+ SwitchInst *PredSI = cast<SwitchInst>(PredBB->getTerminator());
+ SwitchInst *DestSI = cast<SwitchInst>(DestBB->getTerminator());
+
+ // There are a variety of optimizations that we can potentially do on these
+ // blocks: we order them from most to least preferable.
+
+ // If DESTBB *just* contains the switch, then we can forward edges from PREDBB
+ // directly to their destination. This does not introduce *any* code size
+ // growth. Skip debug info first.
+ BasicBlock::iterator BBI = DestBB->begin();
+ while (isa<DbgInfoIntrinsic>(BBI))
+ BBI++;
+
+ // FIXME: Thread if it just contains a PHI.
+ if (isa<SwitchInst>(BBI)) {
+ bool MadeChange = false;
+ // Ignore the default edge for now.
+ for (unsigned i = 1, e = DestSI->getNumSuccessors(); i != e; ++i) {
+ ConstantInt *DestVal = DestSI->getCaseValue(i);
+ BasicBlock *DestSucc = DestSI->getSuccessor(i);
+
+ // Okay, DestSI has a case for 'DestVal' that goes to 'DestSucc'. See if
+ // PredSI has an explicit case for it. If so, forward. If it is covered
+ // by the default case, we can't update PredSI.
+ unsigned PredCase = PredSI->findCaseValue(DestVal);
+ if (PredCase == 0) continue;
+
+ // If PredSI doesn't go to DestBB on this value, then it won't reach the
+ // case on this condition.
+ if (PredSI->getSuccessor(PredCase) != DestBB &&
+ DestSI->getSuccessor(i) != DestBB)
+ continue;
+
+ // Do not forward this if it already goes to this destination, this would
+ // be an infinite loop.
+ if (PredSI->getSuccessor(PredCase) == DestSucc)
+ continue;
+
+ // Otherwise, we're safe to make the change. Make sure that the edge from
+ // DestSI to DestSucc is not critical and has no PHI nodes.
+ DEBUG(dbgs() << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI);
+ DEBUG(dbgs() << "THROUGH: " << *DestSI);
+
+ // If the destination has PHI nodes, just split the edge for updating
+ // simplicity.
+ if (isa<PHINode>(DestSucc->begin()) && !DestSucc->getSinglePredecessor()){
+ SplitCriticalEdge(DestSI, i, this);
+ DestSucc = DestSI->getSuccessor(i);
+ }
+ FoldSingleEntryPHINodes(DestSucc);
+ PredSI->setSuccessor(PredCase, DestSucc);
+ MadeChange = true;
+ }
+
+ if (MadeChange)
+ return true;
+ }
+
+ return false;
+}
+
+
+/// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant
+/// load instruction, eliminate it by replacing it with a PHI node. This is an
+/// important optimization that encourages jump threading, and needs to be run
+/// interlaced with other jump threading tasks.
+bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
+ // Don't hack volatile loads.
+ if (LI->isVolatile()) return false;
+
+ // If the load is defined in a block with exactly one predecessor, it can't be
+ // partially redundant.
+ BasicBlock *LoadBB = LI->getParent();
+ if (LoadBB->getSinglePredecessor())
+ return false;
+
+ Value *LoadedPtr = LI->getOperand(0);
+
+ // If the loaded operand is defined in the LoadBB, it can't be available.
+ // TODO: Could do simple PHI translation, that would be fun :)
+ if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr))
+ if (PtrOp->getParent() == LoadBB)
+ return false;
+
+ // Scan a few instructions up from the load, to see if it is obviously live at
+ // the entry to its block.
+ BasicBlock::iterator BBIt = LI;
+
+ if (Value *AvailableVal =
+ FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) {
+ // If the value if the load is locally available within the block, just use
+ // it. This frequently occurs for reg2mem'd allocas.
+ //cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n";
+
+ // If the returned value is the load itself, replace with an undef. This can
+ // only happen in dead loops.
+ if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());
+ LI->replaceAllUsesWith(AvailableVal);
+ LI->eraseFromParent();
+ return true;
+ }
+
+ // Otherwise, if we scanned the whole block and got to the top of the block,
+ // we know the block is locally transparent to the load. If not, something
+ // might clobber its value.
+ if (BBIt != LoadBB->begin())
+ return false;
+
+
+ SmallPtrSet<BasicBlock*, 8> PredsScanned;
+ typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
+ AvailablePredsTy AvailablePreds;
+ BasicBlock *OneUnavailablePred = 0;
+
+ // If we got here, the loaded value is transparent through to the start of the
+ // block. Check to see if it is available in any of the predecessor blocks.
+ for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
+ PI != PE; ++PI) {
+ BasicBlock *PredBB = *PI;
+
+ // If we already scanned this predecessor, skip it.
+ if (!PredsScanned.insert(PredBB))
+ continue;
+
+ // Scan the predecessor to see if the value is available in the pred.
+ BBIt = PredBB->end();
+ Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6);
+ if (!PredAvailable) {
+ OneUnavailablePred = PredBB;
+ continue;
+ }
+
+ // If so, this load is partially redundant. Remember this info so that we
+ // can create a PHI node.
+ AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable));
+ }
+
+ // If the loaded value isn't available in any predecessor, it isn't partially
+ // redundant.
+ if (AvailablePreds.empty()) return false;
+
+ // Okay, the loaded value is available in at least one (and maybe all!)
+ // predecessors. If the value is unavailable in more than one unique
+ // predecessor, we want to insert a merge block for those common predecessors.
+ // This ensures that we only have to insert one reload, thus not increasing
+ // code size.
+ BasicBlock *UnavailablePred = 0;
+
+ // If there is exactly one predecessor where the value is unavailable, the
+ // already computed 'OneUnavailablePred' block is it. If it ends in an
+ // unconditional branch, we know that it isn't a critical edge.
+ if (PredsScanned.size() == AvailablePreds.size()+1 &&
+ OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {
+ UnavailablePred = OneUnavailablePred;
+ } else if (PredsScanned.size() != AvailablePreds.size()) {
+ // Otherwise, we had multiple unavailable predecessors or we had a critical
+ // edge from the one.
+ SmallVector<BasicBlock*, 8> PredsToSplit;
+ SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
+
+ for (unsigned i = 0, e = AvailablePreds.size(); i != e; ++i)
+ AvailablePredSet.insert(AvailablePreds[i].first);
+
+ // Add all the unavailable predecessors to the PredsToSplit list.
+ for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
+ PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ // If the predecessor is an indirect goto, we can't split the edge.
+ if (isa<IndirectBrInst>(P->getTerminator()))
+ return false;
+
+ if (!AvailablePredSet.count(P))
+ PredsToSplit.push_back(P);
+ }
+
+ // Split them out to their own block.
+ UnavailablePred =
+ SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
+ "thread-pre-split", this);
+ }
+
+ // If the value isn't available in all predecessors, then there will be
+ // exactly one where it isn't available. Insert a load on that edge and add
+ // it to the AvailablePreds list.
+ if (UnavailablePred) {
+ assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
+ "Can't handle critical edge here!");
+ Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false,
+ LI->getAlignment(),
+ UnavailablePred->getTerminator());
+ AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
+ }
+
+ // Now we know that each predecessor of this block has a value in
+ // AvailablePreds, sort them for efficient access as we're walking the preds.
+ array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
+
+ // Create a PHI node at the start of the block for the PRE'd load value.
+ PHINode *PN = PHINode::Create(LI->getType(), "", LoadBB->begin());
+ PN->takeName(LI);
+
+ // Insert new entries into the PHI for each predecessor. A single block may
+ // have multiple entries here.
+ for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E;
+ ++PI) {
+ BasicBlock *P = *PI;
+ AvailablePredsTy::iterator I =
+ std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
+ std::make_pair(P, (Value*)0));
+
+ assert(I != AvailablePreds.end() && I->first == P &&
+ "Didn't find entry for predecessor!");
+
+ PN->addIncoming(I->second, I->first);
+ }
+
+ //cerr << "PRE: " << *LI << *PN << "\n";
+
+ LI->replaceAllUsesWith(PN);
+ LI->eraseFromParent();
+
+ return true;
+}
+
+/// FindMostPopularDest - The specified list contains multiple possible
+/// threadable destinations. Pick the one that occurs the most frequently in
+/// the list.
+static BasicBlock *
+FindMostPopularDest(BasicBlock *BB,
+ const SmallVectorImpl<std::pair<BasicBlock*,
+ BasicBlock*> > &PredToDestList) {
+ assert(!PredToDestList.empty());
+
+ // Determine popularity. If there are multiple possible destinations, we
+ // explicitly choose to ignore 'undef' destinations. We prefer to thread
+ // blocks with known and real destinations to threading undef. We'll handle
+ // them later if interesting.
+ DenseMap<BasicBlock*, unsigned> DestPopularity;
+ for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
+ if (PredToDestList[i].second)
+ DestPopularity[PredToDestList[i].second]++;
+
+ // Find the most popular dest.
+ DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
+ BasicBlock *MostPopularDest = DPI->first;
+ unsigned Popularity = DPI->second;
+ SmallVector<BasicBlock*, 4> SamePopularity;
+
+ for (++DPI; DPI != DestPopularity.end(); ++DPI) {
+ // If the popularity of this entry isn't higher than the popularity we've
+ // seen so far, ignore it.
+ if (DPI->second < Popularity)
+ ; // ignore.
+ else if (DPI->second == Popularity) {
+ // If it is the same as what we've seen so far, keep track of it.
+ SamePopularity.push_back(DPI->first);
+ } else {
+ // If it is more popular, remember it.
+ SamePopularity.clear();
+ MostPopularDest = DPI->first;
+ Popularity = DPI->second;
+ }
+ }
+
+ // Okay, now we know the most popular destination. If there is more than
+ // destination, we need to determine one. This is arbitrary, but we need
+ // to make a deterministic decision. Pick the first one that appears in the
+ // successor list.
+ if (!SamePopularity.empty()) {
+ SamePopularity.push_back(MostPopularDest);
+ TerminatorInst *TI = BB->getTerminator();
+ for (unsigned i = 0; ; ++i) {
+ assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
+
+ if (std::find(SamePopularity.begin(), SamePopularity.end(),
+ TI->getSuccessor(i)) == SamePopularity.end())
+ continue;
+
+ MostPopularDest = TI->getSuccessor(i);
+ break;
+ }
+ }
+
+ // Okay, we have finally picked the most popular destination.
+ return MostPopularDest;
+}
+
+bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
+ // If threading this would thread across a loop header, don't even try to
+ // thread the edge.
+ if (LoopHeaders.count(BB))
+ return false;
+
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues;
+ if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues))
+ return false;
+
+ assert(!PredValues.empty() &&
+ "ComputeValueKnownInPredecessors returned true with no values");
+
+ DEBUG(dbgs() << "IN BB: " << *BB;
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+ dbgs() << " BB '" << BB->getName() << "': FOUND condition = ";
+ if (PredValues[i].first)
+ dbgs() << *PredValues[i].first;
+ else
+ dbgs() << "UNDEF";
+ dbgs() << " for pred '" << PredValues[i].second->getName()
+ << "'.\n";
+ });
+
+ // Decide what we want to thread through. Convert our list of known values to
+ // a list of known destinations for each pred. This also discards duplicate
+ // predecessors and keeps track of the undefined inputs (which are represented
+ // as a null dest in the PredToDestList).
+ SmallPtrSet<BasicBlock*, 16> SeenPreds;
+ SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
+
+ BasicBlock *OnlyDest = 0;
+ BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
+
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+ BasicBlock *Pred = PredValues[i].second;
+ if (!SeenPreds.insert(Pred))
+ continue; // Duplicate predecessor entry.
+
+ // If the predecessor ends with an indirect goto, we can't change its
+ // destination.
+ if (isa<IndirectBrInst>(Pred->getTerminator()))
+ continue;
+
+ ConstantInt *Val = PredValues[i].first;
+
+ BasicBlock *DestBB;
+ if (Val == 0) // Undef.
+ DestBB = 0;
+ else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
+ DestBB = BI->getSuccessor(Val->isZero());
+ else {
+ SwitchInst *SI = cast<SwitchInst>(BB->getTerminator());
+ DestBB = SI->getSuccessor(SI->findCaseValue(Val));
+ }
+
+ // If we have exactly one destination, remember it for efficiency below.
+ if (i == 0)
+ OnlyDest = DestBB;
+ else if (OnlyDest != DestBB)
+ OnlyDest = MultipleDestSentinel;
+
+ PredToDestList.push_back(std::make_pair(Pred, DestBB));
+ }
+
+ // If all edges were unthreadable, we fail.
+ if (PredToDestList.empty())
+ return false;
+
+ // Determine which is the most common successor. If we have many inputs and
+ // this block is a switch, we want to start by threading the batch that goes
+ // to the most popular destination first. If we only know about one
+ // threadable destination (the common case) we can avoid this.
+ BasicBlock *MostPopularDest = OnlyDest;
+
+ if (MostPopularDest == MultipleDestSentinel)
+ MostPopularDest = FindMostPopularDest(BB, PredToDestList);
+
+ // Now that we know what the most popular destination is, factor all
+ // predecessors that will jump to it into a single predecessor.
+ SmallVector<BasicBlock*, 16> PredsToFactor;
+ for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
+ if (PredToDestList[i].second == MostPopularDest) {
+ BasicBlock *Pred = PredToDestList[i].first;
+
+ // This predecessor may be a switch or something else that has multiple
+ // edges to the block. Factor each of these edges by listing them
+ // according to # occurrences in PredsToFactor.
+ TerminatorInst *PredTI = Pred->getTerminator();
+ for (unsigned i = 0, e = PredTI->getNumSuccessors(); i != e; ++i)
+ if (PredTI->getSuccessor(i) == BB)
+ PredsToFactor.push_back(Pred);
+ }
+
+ // If the threadable edges are branching on an undefined value, we get to pick
+ // the destination that these predecessors should get to.
+ if (MostPopularDest == 0)
+ MostPopularDest = BB->getTerminator()->
+ getSuccessor(GetBestDestForJumpOnUndef(BB));
+
+ // Ok, try to thread it!
+ return ThreadEdge(BB, PredsToFactor, MostPopularDest);
+}
+
+/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
+/// a PHI node in the current block. See if there are any simplifications we
+/// can do based on inputs to the phi node.
+///
+bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) {
+ BasicBlock *BB = PN->getParent();
+
+ // TODO: We could make use of this to do it once for blocks with common PHI
+ // values.
+ SmallVector<BasicBlock*, 1> PredBBs;
+ PredBBs.resize(1);
+
+ // If any of the predecessor blocks end in an unconditional branch, we can
+ // *duplicate* the conditional branch into that block in order to further
+ // encourage jump threading and to eliminate cases where we have branch on a
+ // phi of an icmp (branch on icmp is much better).
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *PredBB = PN->getIncomingBlock(i);
+ if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
+ if (PredBr->isUnconditional()) {
+ PredBBs[0] = PredBB;
+ // Try to duplicate BB into PredBB.
+ if (DuplicateCondBranchOnPHIIntoPred(BB, PredBBs))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on
+/// a xor instruction in the current block. See if there are any
+/// simplifications we can do based on inputs to the xor.
+///
+bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
+ BasicBlock *BB = BO->getParent();
+
+ // If either the LHS or RHS of the xor is a constant, don't do this
+ // optimization.
+ if (isa<ConstantInt>(BO->getOperand(0)) ||
+ isa<ConstantInt>(BO->getOperand(1)))
+ return false;
+
+ // If the first instruction in BB isn't a phi, we won't be able to infer
+ // anything special about any particular predecessor.
+ if (!isa<PHINode>(BB->front()))
+ return false;
+
+ // If we have a xor as the branch input to this block, and we know that the
+ // LHS or RHS of the xor in any predecessor is true/false, then we can clone
+ // the condition into the predecessor and fix that value to true, saving some
+ // logical ops on that path and encouraging other paths to simplify.
+ //
+ // This copies something like this:
+ //
+ // BB:
+ // %X = phi i1 [1], [%X']
+ // %Y = icmp eq i32 %A, %B
+ // %Z = xor i1 %X, %Y
+ // br i1 %Z, ...
+ //
+ // Into:
+ // BB':
+ // %Y = icmp ne i32 %A, %B
+ // br i1 %Z, ...
+
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> XorOpValues;
+ bool isLHS = true;
+ if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues)) {
+ assert(XorOpValues.empty());
+ if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues))
+ return false;
+ isLHS = false;
+ }
+
+ assert(!XorOpValues.empty() &&
+ "ComputeValueKnownInPredecessors returned true with no values");
+
+ // Scan the information to see which is most popular: true or false. The
+ // predecessors can be of the set true, false, or undef.
+ unsigned NumTrue = 0, NumFalse = 0;
+ for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
+ if (!XorOpValues[i].first) continue; // Ignore undefs for the count.
+ if (XorOpValues[i].first->isZero())
+ ++NumFalse;
+ else
+ ++NumTrue;
+ }
+
+ // Determine which value to split on, true, false, or undef if neither.
+ ConstantInt *SplitVal = 0;
+ if (NumTrue > NumFalse)
+ SplitVal = ConstantInt::getTrue(BB->getContext());
+ else if (NumTrue != 0 || NumFalse != 0)
+ SplitVal = ConstantInt::getFalse(BB->getContext());
+
+ // Collect all of the blocks that this can be folded into so that we can
+ // factor this once and clone it once.
+ SmallVector<BasicBlock*, 8> BlocksToFoldInto;
+ for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
+ if (XorOpValues[i].first != SplitVal && XorOpValues[i].first != 0) continue;
+
+ BlocksToFoldInto.push_back(XorOpValues[i].second);
+ }
+
+ // If we inferred a value for all of the predecessors, then duplication won't
+ // help us. However, we can just replace the LHS or RHS with the constant.
+ if (BlocksToFoldInto.size() ==
+ cast<PHINode>(BB->front()).getNumIncomingValues()) {
+ if (SplitVal == 0) {
+ // If all preds provide undef, just nuke the xor, because it is undef too.
+ BO->replaceAllUsesWith(UndefValue::get(BO->getType()));
+ BO->eraseFromParent();
+ } else if (SplitVal->isZero()) {
+ // If all preds provide 0, replace the xor with the other input.
+ BO->replaceAllUsesWith(BO->getOperand(isLHS));
+ BO->eraseFromParent();
+ } else {
+ // If all preds provide 1, set the computed value to 1.
+ BO->setOperand(!isLHS, SplitVal);
+ }
+
+ return true;
+ }
+
+ // Try to duplicate BB into PredBB.
+ return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
+}
+
+
+/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
+/// predecessor to the PHIBB block. If it has PHI nodes, add entries for
+/// NewPred using the entries from OldPred (suitably mapped).
+static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
+ BasicBlock *OldPred,
+ BasicBlock *NewPred,
+ DenseMap<Instruction*, Value*> &ValueMap) {
+ for (BasicBlock::iterator PNI = PHIBB->begin();
+ PHINode *PN = dyn_cast<PHINode>(PNI); ++PNI) {
+ // Ok, we have a PHI node. Figure out what the incoming value was for the
+ // DestBlock.
+ Value *IV = PN->getIncomingValueForBlock(OldPred);
+
+ // Remap the value if necessary.
+ if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
+ DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst);
+ if (I != ValueMap.end())
+ IV = I->second;
+ }
+
+ PN->addIncoming(IV, NewPred);
+ }
+}
+
+/// ThreadEdge - We have decided that it is safe and profitable to factor the
+/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
+/// across BB. Transform the IR to reflect this change.
+bool JumpThreading::ThreadEdge(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock*> &PredBBs,
+ BasicBlock *SuccBB) {
+ // If threading to the same block as we come from, we would infinite loop.
+ if (SuccBB == BB) {
+ DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
+ << "' - would thread to self!\n");
+ return false;
+ }
+
+ // If threading this would thread across a loop header, don't thread the edge.
+ // See the comments above FindLoopHeaders for justifications and caveats.
+ if (LoopHeaders.count(BB)) {
+ DEBUG(dbgs() << " Not threading across loop header BB '" << BB->getName()
+ << "' to dest BB '" << SuccBB->getName()
+ << "' - it might create an irreducible loop!\n");
+ return false;
+ }
+
+ unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
+ if (JumpThreadCost > Threshold) {
+ DEBUG(dbgs() << " Not threading BB '" << BB->getName()
+ << "' - Cost is too high: " << JumpThreadCost << "\n");
+ return false;
+ }
+
+ // And finally, do it! Start by factoring the predecessors is needed.
+ BasicBlock *PredBB;
+ if (PredBBs.size() == 1)
+ PredBB = PredBBs[0];
+ else {
+ DEBUG(dbgs() << " Factoring out " << PredBBs.size()
+ << " common predecessors.\n");
+ PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
+ ".thr_comm", this);
+ }
+
+ // And finally, do it!
+ DEBUG(dbgs() << " Threading edge from '" << PredBB->getName() << "' to '"
+ << SuccBB->getName() << "' with cost: " << JumpThreadCost
+ << ", across block:\n "
+ << *BB << "\n");
+
+ if (LVI)
+ LVI->threadEdge(PredBB, BB, SuccBB);
+
+ // We are going to have to map operands from the original BB block to the new
+ // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
+ // account for entry from PredBB.
+ DenseMap<Instruction*, Value*> ValueMapping;
+
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
+ BB->getName()+".thread",
+ BB->getParent(), BB);
+ NewBB->moveAfter(PredBB);
+
+ BasicBlock::iterator BI = BB->begin();
+ for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+ ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
+
+ // Clone the non-phi instructions of BB into NewBB, keeping track of the
+ // mapping and using it to remap operands in the cloned instructions.
+ for (; !isa<TerminatorInst>(BI); ++BI) {
+ Instruction *New = BI->clone();
+ New->setName(BI->getName());
+ NewBB->getInstList().push_back(New);
+ ValueMapping[BI] = New;
+
+ // Remap operands to patch up intra-block references.
+ for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
+ if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
+ DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
+ if (I != ValueMapping.end())
+ New->setOperand(i, I->second);
+ }
+ }
+
+ // We didn't copy the terminator from BB over to NewBB, because there is now
+ // an unconditional jump to SuccBB. Insert the unconditional jump.
+ BranchInst::Create(SuccBB, NewBB);
+
+ // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
+ // PHI nodes for NewBB now.
+ AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
+
+ // If there were values defined in BB that are used outside the block, then we
+ // now have to update all uses of the value to use either the original value,
+ // the cloned value, or some PHI derived value. This can require arbitrary
+ // PHI insertion, of which we are prepared to do, clean these up now.
+ SSAUpdater SSAUpdate;
+ SmallVector<Use*, 16> UsesToRename;
+ for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ // Scan all uses of this instruction to see if it is used outside of its
+ // block, and if so, record them in UsesToRename.
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+ ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+ if (UserPN->getIncomingBlock(UI) == BB)
+ continue;
+ } else if (User->getParent() == BB)
+ continue;
+
+ UsesToRename.push_back(&UI.getUse());
+ }
+
+ // If there are no uses outside the block, we're done with this instruction.
+ if (UsesToRename.empty())
+ continue;
+
+ DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
+
+ // We found a use of I outside of BB. Rename all uses of I that are outside
+ // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
+ // with the two values we know.
+ SSAUpdate.Initialize(I->getType(), I->getName());
+ SSAUpdate.AddAvailableValue(BB, I);
+ SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
+
+ while (!UsesToRename.empty())
+ SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
+ DEBUG(dbgs() << "\n");
+ }
+
+
+ // Ok, NewBB is good to go. Update the terminator of PredBB to jump to
+ // NewBB instead of BB. This eliminates predecessors from BB, which requires
+ // us to simplify any PHI nodes in BB.
+ TerminatorInst *PredTerm = PredBB->getTerminator();
+ for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
+ if (PredTerm->getSuccessor(i) == BB) {
+ RemovePredecessorAndSimplify(BB, PredBB, TD);
+ PredTerm->setSuccessor(i, NewBB);
+ }
+
+ // At this point, the IR is fully up to date and consistent. Do a quick scan
+ // over the new instructions and zap any that are constants or dead. This
+ // frequently happens because of phi translation.
+ SimplifyInstructionsInBlock(NewBB, TD);
+
+ // Threaded an edge!
+ ++NumThreads;
+ return true;
+}
+
+/// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
+/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
+/// If we can duplicate the contents of BB up into PredBB do so now, this
+/// improves the odds that the branch will be on an analyzable instruction like
+/// a compare.
+bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock *> &PredBBs) {
+ assert(!PredBBs.empty() && "Can't handle an empty set");
+
+ // If BB is a loop header, then duplicating this block outside the loop would
+ // cause us to transform this into an irreducible loop, don't do this.
+ // See the comments above FindLoopHeaders for justifications and caveats.
+ if (LoopHeaders.count(BB)) {
+ DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
+ << "' into predecessor block '" << PredBBs[0]->getName()
+ << "' - it might create an irreducible loop!\n");
+ return false;
+ }
+
+ unsigned DuplicationCost = getJumpThreadDuplicationCost(BB);
+ if (DuplicationCost > Threshold) {
+ DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
+ << "' - Cost is too high: " << DuplicationCost << "\n");
+ return false;
+ }
+
+ // And finally, do it! Start by factoring the predecessors is needed.
+ BasicBlock *PredBB;
+ if (PredBBs.size() == 1)
+ PredBB = PredBBs[0];
+ else {
+ DEBUG(dbgs() << " Factoring out " << PredBBs.size()
+ << " common predecessors.\n");
+ PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
+ ".thr_comm", this);
+ }
+
+ // Okay, we decided to do this! Clone all the instructions in BB onto the end
+ // of PredBB.
+ DEBUG(dbgs() << " Duplicating block '" << BB->getName() << "' into end of '"
+ << PredBB->getName() << "' to eliminate branch on phi. Cost: "
+ << DuplicationCost << " block is:" << *BB << "\n");
+
+ // Unless PredBB ends with an unconditional branch, split the edge so that we
+ // can just clone the bits from BB into the end of the new PredBB.
+ BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
+
+ if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) {
+ PredBB = SplitEdge(PredBB, BB, this);
+ OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
+ }
+
+ // We are going to have to map operands from the original BB block into the
+ // PredBB block. Evaluate PHI nodes in BB.
+ DenseMap<Instruction*, Value*> ValueMapping;
+
+ BasicBlock::iterator BI = BB->begin();
+ for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+ ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
+
+ // Clone the non-phi instructions of BB into PredBB, keeping track of the
+ // mapping and using it to remap operands in the cloned instructions.
+ for (; BI != BB->end(); ++BI) {
+ Instruction *New = BI->clone();
+
+ // Remap operands to patch up intra-block references.
+ for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
+ if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
+ DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
+ if (I != ValueMapping.end())
+ New->setOperand(i, I->second);
+ }
+
+ // If this instruction can be simplified after the operands are updated,
+ // just use the simplified value instead. This frequently happens due to
+ // phi translation.
+ if (Value *IV = SimplifyInstruction(New, TD)) {
+ delete New;
+ ValueMapping[BI] = IV;
+ } else {
+ // Otherwise, insert the new instruction into the block.
+ New->setName(BI->getName());
+ PredBB->getInstList().insert(OldPredBranch, New);
+ ValueMapping[BI] = New;
+ }
+ }
+
+ // Check to see if the targets of the branch had PHI nodes. If so, we need to
+ // add entries to the PHI nodes for branch from PredBB now.
+ BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
+ AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
+ ValueMapping);
+ AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
+ ValueMapping);
+
+ // If there were values defined in BB that are used outside the block, then we
+ // now have to update all uses of the value to use either the original value,
+ // the cloned value, or some PHI derived value. This can require arbitrary
+ // PHI insertion, of which we are prepared to do, clean these up now.
+ SSAUpdater SSAUpdate;
+ SmallVector<Use*, 16> UsesToRename;
+ for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ // Scan all uses of this instruction to see if it is used outside of its
+ // block, and if so, record them in UsesToRename.
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+ ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+ if (UserPN->getIncomingBlock(UI) == BB)
+ continue;
+ } else if (User->getParent() == BB)
+ continue;
+
+ UsesToRename.push_back(&UI.getUse());
+ }
+
+ // If there are no uses outside the block, we're done with this instruction.
+ if (UsesToRename.empty())
+ continue;
+
+ DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
+
+ // We found a use of I outside of BB. Rename all uses of I that are outside
+ // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
+ // with the two values we know.
+ SSAUpdate.Initialize(I->getType(), I->getName());
+ SSAUpdate.AddAvailableValue(BB, I);
+ SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
+
+ while (!UsesToRename.empty())
+ SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
+ DEBUG(dbgs() << "\n");
+ }
+
+ // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
+ // that we nuked.
+ RemovePredecessorAndSimplify(BB, PredBB, TD);
+
+ // Remove the unconditional branch at the end of the PredBB block.
+ OldPredBranch->eraseFromParent();
+
+ ++NumDupes;
+ return true;
+}
+
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
new file mode 100644
index 0000000..2ef8544
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -0,0 +1,917 @@
+//===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs loop invariant code motion, attempting to remove as much
+// code from the body of a loop as possible. It does this by either hoisting
+// code into the preheader block, or by sinking code to the exit blocks if it is
+// safe. This pass also promotes must-aliased memory locations in the loop to
+// live in registers, thus hoisting and sinking "invariant" loads and stores.
+//
+// This pass uses alias analysis for two purposes:
+//
+// 1. Moving loop invariant loads and calls out of loops. If we can determine
+// that a load or call inside of a loop never aliases anything stored to,
+// we can hoist it or sink it like any other instruction.
+// 2. Scalar Promotion of Memory - If there is a store instruction inside of
+// the loop, we try to move the store to happen AFTER the loop instead of
+// inside of the loop. This can only happen if a few conditions are true:
+// A. The pointer stored through is loop invariant
+// B. There are no stores or loads in the loop which _may_ alias the
+// pointer. There are no calls in the loop which mod/ref the pointer.
+// If these conditions are true, we can promote the loads and stores in the
+// loop of the pointer to use a temporary alloca'd variable. We then use
+// the SSAUpdater to construct the appropriate SSA form for the value.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "licm"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumSunk , "Number of instructions sunk out of loop");
+STATISTIC(NumHoisted , "Number of instructions hoisted out of loop");
+STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
+STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
+STATISTIC(NumPromoted , "Number of memory locations promoted to registers");
+
+static cl::opt<bool>
+DisablePromotion("disable-licm-promotion", cl::Hidden,
+ cl::desc("Disable memory promotion in LICM pass"));
+
+namespace {
+ struct LICM : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+ LICM() : LoopPass(ID) {}
+
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG...
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreservedID(LoopSimplifyID);
+ }
+
+ bool doFinalization() {
+ assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets");
+ return false;
+ }
+
+ private:
+ AliasAnalysis *AA; // Current AliasAnalysis information
+ LoopInfo *LI; // Current LoopInfo
+ DominatorTree *DT; // Dominator Tree for the current Loop.
+
+ // State that is updated as we process loops.
+ bool Changed; // Set to true when we change anything.
+ BasicBlock *Preheader; // The preheader block of the current loop...
+ Loop *CurLoop; // The current loop we are working on...
+ AliasSetTracker *CurAST; // AliasSet information for the current loop...
+ DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
+
+ /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
+ void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L);
+
+ /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
+ /// set.
+ void deleteAnalysisValue(Value *V, Loop *L);
+
+ /// SinkRegion - Walk the specified region of the CFG (defined by all blocks
+ /// dominated by the specified block, and that are in the current loop) in
+ /// reverse depth first order w.r.t the DominatorTree. This allows us to
+ /// visit uses before definitions, allowing us to sink a loop body in one
+ /// pass without iteration.
+ ///
+ void SinkRegion(DomTreeNode *N);
+
+ /// HoistRegion - Walk the specified region of the CFG (defined by all
+ /// blocks dominated by the specified block, and that are in the current
+ /// loop) in depth first order w.r.t the DominatorTree. This allows us to
+ /// visit definitions before uses, allowing us to hoist a loop body in one
+ /// pass without iteration.
+ ///
+ void HoistRegion(DomTreeNode *N);
+
+ /// inSubLoop - Little predicate that returns true if the specified basic
+ /// block is in a subloop of the current one, not the current one itself.
+ ///
+ bool inSubLoop(BasicBlock *BB) {
+ assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
+ for (Loop::iterator I = CurLoop->begin(), E = CurLoop->end(); I != E; ++I)
+ if ((*I)->contains(BB))
+ return true; // A subloop actually contains this block!
+ return false;
+ }
+
+ /// isExitBlockDominatedByBlockInLoop - This method checks to see if the
+ /// specified exit block of the loop is dominated by the specified block
+ /// that is in the body of the loop. We use these constraints to
+ /// dramatically limit the amount of the dominator tree that needs to be
+ /// searched.
+ bool isExitBlockDominatedByBlockInLoop(BasicBlock *ExitBlock,
+ BasicBlock *BlockInLoop) const {
+ // If the block in the loop is the loop header, it must be dominated!
+ BasicBlock *LoopHeader = CurLoop->getHeader();
+ if (BlockInLoop == LoopHeader)
+ return true;
+
+ DomTreeNode *BlockInLoopNode = DT->getNode(BlockInLoop);
+ DomTreeNode *IDom = DT->getNode(ExitBlock);
+
+ // Because the exit block is not in the loop, we know we have to get _at
+ // least_ its immediate dominator.
+ IDom = IDom->getIDom();
+
+ while (IDom && IDom != BlockInLoopNode) {
+ // If we have got to the header of the loop, then the instructions block
+ // did not dominate the exit node, so we can't hoist it.
+ if (IDom->getBlock() == LoopHeader)
+ return false;
+
+ // Get next Immediate Dominator.
+ IDom = IDom->getIDom();
+ };
+
+ return true;
+ }
+
+ /// sink - When an instruction is found to only be used outside of the loop,
+ /// this function moves it to the exit blocks and patches up SSA form as
+ /// needed.
+ ///
+ void sink(Instruction &I);
+
+ /// hoist - When an instruction is found to only use loop invariant operands
+ /// that is safe to hoist, this instruction is called to do the dirty work.
+ ///
+ void hoist(Instruction &I);
+
+ /// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it
+ /// is not a trapping instruction or if it is a trapping instruction and is
+ /// guaranteed to execute.
+ ///
+ bool isSafeToExecuteUnconditionally(Instruction &I);
+
+ /// pointerInvalidatedByLoop - Return true if the body of this loop may
+ /// store into the memory location pointed to by V.
+ ///
+ bool pointerInvalidatedByLoop(Value *V, unsigned Size) {
+ // Check to see if any of the basic blocks in CurLoop invalidate *V.
+ return CurAST->getAliasSetForPointer(V, Size).isMod();
+ }
+
+ bool canSinkOrHoistInst(Instruction &I);
+ bool isLoopInvariantInst(Instruction &I);
+ bool isNotUsedInLoop(Instruction &I);
+
+ void PromoteAliasSet(AliasSet &AS);
+ };
+}
+
+char LICM::ID = 0;
+INITIALIZE_PASS(LICM, "licm", "Loop Invariant Code Motion", false, false);
+
+Pass *llvm::createLICMPass() { return new LICM(); }
+
+/// Hoist expressions out of the specified loop. Note, alias info for inner
+/// loop is not preserved so it is not a good idea to run LICM multiple
+/// times on one loop.
+///
+bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
+ Changed = false;
+
+ // Get our Loop and Alias Analysis information...
+ LI = &getAnalysis<LoopInfo>();
+ AA = &getAnalysis<AliasAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
+
+ CurAST = new AliasSetTracker(*AA);
+ // Collect Alias info from subloops.
+ for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
+ LoopItr != LoopItrE; ++LoopItr) {
+ Loop *InnerL = *LoopItr;
+ AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL];
+ assert(InnerAST && "Where is my AST?");
+
+ // What if InnerLoop was modified by other passes ?
+ CurAST->add(*InnerAST);
+
+ // Once we've incorporated the inner loop's AST into ours, we don't need the
+ // subloop's anymore.
+ delete InnerAST;
+ LoopToAliasSetMap.erase(InnerL);
+ }
+
+ CurLoop = L;
+
+ // Get the preheader block to move instructions into...
+ Preheader = L->getLoopPreheader();
+
+ // Loop over the body of this loop, looking for calls, invokes, and stores.
+ // Because subloops have already been incorporated into AST, we skip blocks in
+ // subloops.
+ //
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+ if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops.
+ CurAST->add(*BB); // Incorporate the specified basic block
+ }
+
+ // We want to visit all of the instructions in this loop... that are not parts
+ // of our subloops (they have already had their invariants hoisted out of
+ // their loop, into this loop, so there is no need to process the BODIES of
+ // the subloops).
+ //
+ // Traverse the body of the loop in depth first order on the dominator tree so
+ // that we are guaranteed to see definitions before we see uses. This allows
+ // us to sink instructions in one pass, without iteration. After sinking
+ // instructions, we perform another pass to hoist them out of the loop.
+ //
+ if (L->hasDedicatedExits())
+ SinkRegion(DT->getNode(L->getHeader()));
+ if (Preheader)
+ HoistRegion(DT->getNode(L->getHeader()));
+
+ // Now that all loop invariants have been removed from the loop, promote any
+ // memory references to scalars that we can.
+ if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
+ // Loop over all of the alias sets in the tracker object.
+ for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
+ I != E; ++I)
+ PromoteAliasSet(*I);
+ }
+
+ // Clear out loops state information for the next iteration
+ CurLoop = 0;
+ Preheader = 0;
+
+ // If this loop is nested inside of another one, save the alias information
+ // for when we process the outer loop.
+ if (L->getParentLoop())
+ LoopToAliasSetMap[L] = CurAST;
+ else
+ delete CurAST;
+ return Changed;
+}
+
+/// SinkRegion - Walk the specified region of the CFG (defined by all blocks
+/// dominated by the specified block, and that are in the current loop) in
+/// reverse depth first order w.r.t the DominatorTree. This allows us to visit
+/// uses before definitions, allowing us to sink a loop body in one pass without
+/// iteration.
+///
+void LICM::SinkRegion(DomTreeNode *N) {
+ assert(N != 0 && "Null dominator tree node?");
+ BasicBlock *BB = N->getBlock();
+
+ // If this subregion is not in the top level loop at all, exit.
+ if (!CurLoop->contains(BB)) return;
+
+ // We are processing blocks in reverse dfo, so process children first.
+ const std::vector<DomTreeNode*> &Children = N->getChildren();
+ for (unsigned i = 0, e = Children.size(); i != e; ++i)
+ SinkRegion(Children[i]);
+
+ // Only need to process the contents of this block if it is not part of a
+ // subloop (which would already have been processed).
+ if (inSubLoop(BB)) return;
+
+ for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
+ Instruction &I = *--II;
+
+ // If the instruction is dead, we would try to sink it because it isn't used
+ // in the loop, instead, just delete it.
+ if (isInstructionTriviallyDead(&I)) {
+ DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
+ ++II;
+ CurAST->deleteValue(&I);
+ I.eraseFromParent();
+ Changed = true;
+ continue;
+ }
+
+ // Check to see if we can sink this instruction to the exit blocks
+ // of the loop. We can do this if the all users of the instruction are
+ // outside of the loop. In this case, it doesn't even matter if the
+ // operands of the instruction are loop invariant.
+ //
+ if (isNotUsedInLoop(I) && canSinkOrHoistInst(I)) {
+ ++II;
+ sink(I);
+ }
+ }
+}
+
+/// HoistRegion - Walk the specified region of the CFG (defined by all blocks
+/// dominated by the specified block, and that are in the current loop) in depth
+/// first order w.r.t the DominatorTree. This allows us to visit definitions
+/// before uses, allowing us to hoist a loop body in one pass without iteration.
+///
+void LICM::HoistRegion(DomTreeNode *N) {
+ assert(N != 0 && "Null dominator tree node?");
+ BasicBlock *BB = N->getBlock();
+
+ // If this subregion is not in the top level loop at all, exit.
+ if (!CurLoop->contains(BB)) return;
+
+ // Only need to process the contents of this block if it is not part of a
+ // subloop (which would already have been processed).
+ if (!inSubLoop(BB))
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) {
+ Instruction &I = *II++;
+
+ // Try constant folding this instruction. If all the operands are
+ // constants, it is technically hoistable, but it would be better to just
+ // fold it.
+ if (Constant *C = ConstantFoldInstruction(&I)) {
+ DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n');
+ CurAST->copyValue(&I, C);
+ CurAST->deleteValue(&I);
+ I.replaceAllUsesWith(C);
+ I.eraseFromParent();
+ continue;
+ }
+
+ // Try hoisting the instruction out to the preheader. We can only do this
+ // if all of the operands of the instruction are loop invariant and if it
+ // is safe to hoist the instruction.
+ //
+ if (isLoopInvariantInst(I) && canSinkOrHoistInst(I) &&
+ isSafeToExecuteUnconditionally(I))
+ hoist(I);
+ }
+
+ const std::vector<DomTreeNode*> &Children = N->getChildren();
+ for (unsigned i = 0, e = Children.size(); i != e; ++i)
+ HoistRegion(Children[i]);
+}
+
+/// canSinkOrHoistInst - Return true if the hoister and sinker can handle this
+/// instruction.
+///
+bool LICM::canSinkOrHoistInst(Instruction &I) {
+ // Loads have extra constraints we have to verify before we can hoist them.
+ if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ if (LI->isVolatile())
+ return false; // Don't hoist volatile loads!
+
+ // Loads from constant memory are always safe to move, even if they end up
+ // in the same alias set as something that ends up being modified.
+ if (AA->pointsToConstantMemory(LI->getOperand(0)))
+ return true;
+
+ // Don't hoist loads which have may-aliased stores in loop.
+ unsigned Size = 0;
+ if (LI->getType()->isSized())
+ Size = AA->getTypeStoreSize(LI->getType());
+ return !pointerInvalidatedByLoop(LI->getOperand(0), Size);
+ } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+ // Handle obvious cases efficiently.
+ AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI);
+ if (Behavior == AliasAnalysis::DoesNotAccessMemory)
+ return true;
+ else if (Behavior == AliasAnalysis::OnlyReadsMemory) {
+ // If this call only reads from memory and there are no writes to memory
+ // in the loop, we can hoist or sink the call as appropriate.
+ bool FoundMod = false;
+ for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
+ I != E; ++I) {
+ AliasSet &AS = *I;
+ if (!AS.isForwardingAliasSet() && AS.isMod()) {
+ FoundMod = true;
+ break;
+ }
+ }
+ if (!FoundMod) return true;
+ }
+
+ // FIXME: This should use mod/ref information to see if we can hoist or sink
+ // the call.
+
+ return false;
+ }
+
+ // Otherwise these instructions are hoistable/sinkable
+ return isa<BinaryOperator>(I) || isa<CastInst>(I) ||
+ isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
+ isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
+ isa<ShuffleVectorInst>(I);
+}
+
+/// isNotUsedInLoop - Return true if the only users of this instruction are
+/// outside of the loop. If this is true, we can sink the instruction to the
+/// exit blocks of the loop.
+///
+bool LICM::isNotUsedInLoop(Instruction &I) {
+ for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (PHINode *PN = dyn_cast<PHINode>(User)) {
+ // PHI node uses occur in predecessor blocks!
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == &I)
+ if (CurLoop->contains(PN->getIncomingBlock(i)))
+ return false;
+ } else if (CurLoop->contains(User)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+
+/// isLoopInvariantInst - Return true if all operands of this instruction are
+/// loop invariant. We also filter out non-hoistable instructions here just for
+/// efficiency.
+///
+bool LICM::isLoopInvariantInst(Instruction &I) {
+ // The instruction is loop invariant if all of its operands are loop-invariant
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+ if (!CurLoop->isLoopInvariant(I.getOperand(i)))
+ return false;
+
+ // If we got this far, the instruction is loop invariant!
+ return true;
+}
+
+/// sink - When an instruction is found to only be used outside of the loop,
+/// this function moves it to the exit blocks and patches up SSA form as needed.
+/// This method is guaranteed to remove the original instruction from its
+/// position, and may either delete it or move it to outside of the loop.
+///
+void LICM::sink(Instruction &I) {
+ DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+
+ if (isa<LoadInst>(I)) ++NumMovedLoads;
+ else if (isa<CallInst>(I)) ++NumMovedCalls;
+ ++NumSunk;
+ Changed = true;
+
+ // The case where there is only a single exit node of this loop is common
+ // enough that we handle it as a special (more efficient) case. It is more
+ // efficient to handle because there are no PHI nodes that need to be placed.
+ if (ExitBlocks.size() == 1) {
+ if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[0], I.getParent())) {
+ // Instruction is not used, just delete it.
+ CurAST->deleteValue(&I);
+ // If I has users in unreachable blocks, eliminate.
+ // If I is not void type then replaceAllUsesWith undef.
+ // This allows ValueHandlers and custom metadata to adjust itself.
+ if (!I.use_empty())
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ I.eraseFromParent();
+ } else {
+ // Move the instruction to the start of the exit block, after any PHI
+ // nodes in it.
+ I.moveBefore(ExitBlocks[0]->getFirstNonPHI());
+
+ // This instruction is no longer in the AST for the current loop, because
+ // we just sunk it out of the loop. If we just sunk it into an outer
+ // loop, we will rediscover the operation when we process it.
+ CurAST->deleteValue(&I);
+ }
+ return;
+ }
+
+ if (ExitBlocks.empty()) {
+ // The instruction is actually dead if there ARE NO exit blocks.
+ CurAST->deleteValue(&I);
+ // If I has users in unreachable blocks, eliminate.
+ // If I is not void type then replaceAllUsesWith undef.
+ // This allows ValueHandlers and custom metadata to adjust itself.
+ if (!I.use_empty())
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ I.eraseFromParent();
+ return;
+ }
+
+ // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the
+ // hard work of inserting PHI nodes as necessary.
+ SmallVector<PHINode*, 8> NewPHIs;
+ SSAUpdater SSA(&NewPHIs);
+
+ if (!I.use_empty())
+ SSA.Initialize(I.getType(), I.getName());
+
+ // Insert a copy of the instruction in each exit block of the loop that is
+ // dominated by the instruction. Each exit block is known to only be in the
+ // ExitBlocks list once.
+ BasicBlock *InstOrigBB = I.getParent();
+ unsigned NumInserted = 0;
+
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = ExitBlocks[i];
+
+ if (!isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB))
+ continue;
+
+ // Insert the code after the last PHI node.
+ BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
+
+ // If this is the first exit block processed, just move the original
+ // instruction, otherwise clone the original instruction and insert
+ // the copy.
+ Instruction *New;
+ if (NumInserted++ == 0) {
+ I.moveBefore(InsertPt);
+ New = &I;
+ } else {
+ New = I.clone();
+ if (!I.getName().empty())
+ New->setName(I.getName()+".le");
+ ExitBlock->getInstList().insert(InsertPt, New);
+ }
+
+ // Now that we have inserted the instruction, inform SSAUpdater.
+ if (!I.use_empty())
+ SSA.AddAvailableValue(ExitBlock, New);
+ }
+
+ // If the instruction doesn't dominate any exit blocks, it must be dead.
+ if (NumInserted == 0) {
+ CurAST->deleteValue(&I);
+ if (!I.use_empty())
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ I.eraseFromParent();
+ return;
+ }
+
+ // Next, rewrite uses of the instruction, inserting PHI nodes as needed.
+ for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) {
+ // Grab the use before incrementing the iterator.
+ Use &U = UI.getUse();
+ // Increment the iterator before removing the use from the list.
+ ++UI;
+ SSA.RewriteUseAfterInsertions(U);
+ }
+
+ // Update CurAST for NewPHIs if I had pointer type.
+ if (I.getType()->isPointerTy())
+ for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+ CurAST->copyValue(&I, NewPHIs[i]);
+
+ // Finally, remove the instruction from CurAST. It is no longer in the loop.
+ CurAST->deleteValue(&I);
+}
+
+/// hoist - When an instruction is found to only use loop invariant operands
+/// that is safe to hoist, this instruction is called to do the dirty work.
+///
+void LICM::hoist(Instruction &I) {
+ DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": "
+ << I << "\n");
+
+ // Move the new node to the Preheader, before its terminator.
+ I.moveBefore(Preheader->getTerminator());
+
+ if (isa<LoadInst>(I)) ++NumMovedLoads;
+ else if (isa<CallInst>(I)) ++NumMovedCalls;
+ ++NumHoisted;
+ Changed = true;
+}
+
+/// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it is
+/// not a trapping instruction or if it is a trapping instruction and is
+/// guaranteed to execute.
+///
+bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
+ // If it is not a trapping instruction, it is always safe to hoist.
+ if (Inst.isSafeToSpeculativelyExecute())
+ return true;
+
+ // Otherwise we have to check to make sure that the instruction dominates all
+ // of the exit blocks. If it doesn't, then there is a path out of the loop
+ // which does not execute this instruction, so we can't hoist it.
+
+ // If the instruction is in the header block for the loop (which is very
+ // common), it is always guaranteed to dominate the exit blocks. Since this
+ // is a common case, and can save some work, check it now.
+ if (Inst.getParent() == CurLoop->getHeader())
+ return true;
+
+ // Get the exit blocks for the current loop.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getExitBlocks(ExitBlocks);
+
+ // For each exit block, get the DT node and walk up the DT until the
+ // instruction's basic block is found or we exit the loop.
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[i], Inst.getParent()))
+ return false;
+
+ return true;
+}
+
+/// PromoteAliasSet - Try to promote memory values to scalars by sinking
+/// stores out of the loop and moving loads to before the loop. We do this by
+/// looping over the stores in the loop, looking for stores to Must pointers
+/// which are loop invariant.
+///
+void LICM::PromoteAliasSet(AliasSet &AS) {
+ // We can promote this alias set if it has a store, if it is a "Must" alias
+ // set, if the pointer is loop invariant, and if we are not eliminating any
+ // volatile loads or stores.
+ if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
+ AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
+ return;
+
+ assert(!AS.empty() &&
+ "Must alias set should have at least one pointer element in it!");
+ Value *SomePtr = AS.begin()->getValue();
+
+ // It isn't safe to promote a load/store from the loop if the load/store is
+ // conditional. For example, turning:
+ //
+ // for () { if (c) *P += 1; }
+ //
+ // into:
+ //
+ // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
+ //
+ // is not safe, because *P may only be valid to access if 'c' is true.
+ //
+ // It is safe to promote P if all uses are direct load/stores and if at
+ // least one is guaranteed to be executed.
+ bool GuaranteedToExecute = false;
+
+ SmallVector<Instruction*, 64> LoopUses;
+ SmallPtrSet<Value*, 4> PointerMustAliases;
+
+ // Check that all of the pointers in the alias set have the same type. We
+ // cannot (yet) promote a memory location that is loaded and stored in
+ // different sizes.
+ for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
+ Value *ASIV = ASI->getValue();
+ PointerMustAliases.insert(ASIV);
+
+ // Check that all of the pointers in the alias set have the same type. We
+ // cannot (yet) promote a memory location that is loaded and stored in
+ // different sizes.
+ if (SomePtr->getType() != ASIV->getType())
+ return;
+
+ for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end();
+ UI != UE; ++UI) {
+ // Ignore instructions that are outside the loop.
+ Instruction *Use = dyn_cast<Instruction>(*UI);
+ if (!Use || !CurLoop->contains(Use))
+ continue;
+
+ // If there is an non-load/store instruction in the loop, we can't promote
+ // it.
+ if (isa<LoadInst>(Use))
+ assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
+ else if (isa<StoreInst>(Use)) {
+ assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
+ if (Use->getOperand(0) == ASIV) return;
+ } else
+ return; // Not a load or store.
+
+ if (!GuaranteedToExecute)
+ GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use);
+
+ LoopUses.push_back(Use);
+ }
+ }
+
+ // If there isn't a guaranteed-to-execute instruction, we can't promote.
+ if (!GuaranteedToExecute)
+ return;
+
+ // Otherwise, this is safe to promote, lets do it!
+ DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
+ Changed = true;
+ ++NumPromoted;
+
+ // We use the SSAUpdater interface to insert phi nodes as required.
+ SmallVector<PHINode*, 16> NewPHIs;
+ SSAUpdater SSA(&NewPHIs);
+
+ // It wants to know some value of the same type as what we'll be inserting.
+ Value *SomeValue;
+ if (isa<LoadInst>(LoopUses[0]))
+ SomeValue = LoopUses[0];
+ else
+ SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0);
+ SSA.Initialize(SomeValue->getType(), SomeValue->getName());
+
+ // First step: bucket up uses of the pointers by the block they occur in.
+ // This is important because we have to handle multiple defs/uses in a block
+ // ourselves: SSAUpdater is purely for cross-block references.
+ // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element.
+ DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock;
+ for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
+ Instruction *User = LoopUses[i];
+ UsesByBlock[User->getParent()].push_back(User);
+ }
+
+ // Okay, now we can iterate over all the blocks in the loop with uses,
+ // processing them. Keep track of which loads are loading a live-in value.
+ SmallVector<LoadInst*, 32> LiveInLoads;
+ DenseMap<Value*, Value*> ReplacedLoads;
+
+ for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) {
+ Instruction *User = LoopUses[LoopUse];
+ std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()];
+
+ // If this block has already been processed, ignore this repeat use.
+ if (BlockUses.empty()) continue;
+
+ // Okay, this is the first use in the block. If this block just has a
+ // single user in it, we can rewrite it trivially.
+ if (BlockUses.size() == 1) {
+ // If it is a store, it is a trivial def of the value in the block.
+ if (isa<StoreInst>(User)) {
+ SSA.AddAvailableValue(User->getParent(),
+ cast<StoreInst>(User)->getOperand(0));
+ } else {
+ // Otherwise it is a load, queue it to rewrite as a live-in load.
+ LiveInLoads.push_back(cast<LoadInst>(User));
+ }
+ BlockUses.clear();
+ continue;
+ }
+
+ // Otherwise, check to see if this block is all loads. If so, we can queue
+ // them all as live in loads.
+ bool HasStore = false;
+ for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
+ if (isa<StoreInst>(BlockUses[i])) {
+ HasStore = true;
+ break;
+ }
+ }
+
+ if (!HasStore) {
+ for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
+ LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
+ BlockUses.clear();
+ continue;
+ }
+
+ // Otherwise, we have mixed loads and stores (or just a bunch of stores).
+ // Since SSAUpdater is purely for cross-block values, we need to determine
+ // the order of these instructions in the block. If the first use in the
+ // block is a load, then it uses the live in value. The last store defines
+ // the live out value. We handle this by doing a linear scan of the block.
+ BasicBlock *BB = User->getParent();
+ Value *StoredValue = 0;
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+ if (LoadInst *L = dyn_cast<LoadInst>(II)) {
+ // If this is a load from an unrelated pointer, ignore it.
+ if (!PointerMustAliases.count(L->getOperand(0))) continue;
+
+ // If we haven't seen a store yet, this is a live in use, otherwise
+ // use the stored value.
+ if (StoredValue) {
+ L->replaceAllUsesWith(StoredValue);
+ ReplacedLoads[L] = StoredValue;
+ } else {
+ LiveInLoads.push_back(L);
+ }
+ continue;
+ }
+
+ if (StoreInst *S = dyn_cast<StoreInst>(II)) {
+ // If this is a store to an unrelated pointer, ignore it.
+ if (!PointerMustAliases.count(S->getOperand(1))) continue;
+
+ // Remember that this is the active value in the block.
+ StoredValue = S->getOperand(0);
+ }
+ }
+
+ // The last stored value that happened is the live-out for the block.
+ assert(StoredValue && "Already checked that there is a store in block");
+ SSA.AddAvailableValue(BB, StoredValue);
+ BlockUses.clear();
+ }
+
+ // Now that all the intra-loop values are classified, set up the preheader.
+ // It gets a load of the pointer we're promoting, and it is the live-out value
+ // from the preheader.
+ LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted",
+ Preheader->getTerminator());
+ SSA.AddAvailableValue(Preheader, PreheaderLoad);
+
+ // Now that the preheader is good to go, set up the exit blocks. Each exit
+ // block gets a store of the live-out values that feed them. Since we've
+ // already told the SSA updater about the defs in the loop and the preheader
+ // definition, it is all set and we can start using it.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = ExitBlocks[i];
+ Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+ Instruction *InsertPos = ExitBlock->getFirstNonPHI();
+ new StoreInst(LiveInValue, SomePtr, InsertPos);
+ }
+
+ // Okay, now we rewrite all loads that use live-in values in the loop,
+ // inserting PHI nodes as necessary.
+ for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
+ LoadInst *ALoad = LiveInLoads[i];
+ Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
+ ALoad->replaceAllUsesWith(NewVal);
+ CurAST->copyValue(ALoad, NewVal);
+ ReplacedLoads[ALoad] = NewVal;
+ }
+
+ // If the preheader load is itself a pointer, we need to tell alias analysis
+ // about the new pointer we created in the preheader block and about any PHI
+ // nodes that just got inserted.
+ if (PreheaderLoad->getType()->isPointerTy()) {
+ // Copy any value stored to or loaded from a must-alias of the pointer.
+ CurAST->copyValue(SomeValue, PreheaderLoad);
+
+ for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+ CurAST->copyValue(SomeValue, NewPHIs[i]);
+ }
+
+ // Now that everything is rewritten, delete the old instructions from the body
+ // of the loop. They should all be dead now.
+ for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
+ Instruction *User = LoopUses[i];
+
+ // If this is a load that still has uses, then the load must have been added
+ // as a live value in the SSAUpdate data structure for a block (e.g. because
+ // the loaded value was stored later). In this case, we need to recursively
+ // propagate the updates until we get to the real value.
+ if (!User->use_empty()) {
+ Value *NewVal = ReplacedLoads[User];
+ assert(NewVal && "not a replaced load?");
+
+ // Propagate down to the ultimate replacee. The intermediately loads
+ // could theoretically already have been deleted, so we don't want to
+ // dereference the Value*'s.
+ DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
+ while (RLI != ReplacedLoads.end()) {
+ NewVal = RLI->second;
+ RLI = ReplacedLoads.find(NewVal);
+ }
+
+ User->replaceAllUsesWith(NewVal);
+ CurAST->copyValue(User, NewVal);
+ }
+
+ CurAST->deleteValue(User);
+ User->eraseFromParent();
+ }
+
+ // fwew, we're done!
+}
+
+
+/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
+void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
+ AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
+ if (!AST)
+ return;
+
+ AST->copyValue(From, To);
+}
+
+/// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
+/// set.
+void LICM::deleteAnalysisValue(Value *V, Loop *L) {
+ AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
+ if (!AST)
+ return;
+
+ AST->deleteValue(V);
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
new file mode 100644
index 0000000..543dfc1
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -0,0 +1,233 @@
+//===- LoopDeletion.cpp - Dead Loop Deletion Pass ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Dead Loop Deletion Pass. This pass is responsible
+// for eliminating loops with non-infinite computable trip counts that have no
+// side effects or volatile instructions, and do not contribute to the
+// computation of the function's return value.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-delete"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+STATISTIC(NumDeleted, "Number of loops deleted");
+
+namespace {
+ class LoopDeletion : public LoopPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopDeletion() : LoopPass(ID) {}
+
+ // Possibly eliminate loop L if it is dead.
+ bool runOnLoop(Loop* L, LPPassManager& LPM);
+
+ bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks,
+ SmallVector<BasicBlock*, 4>& exitBlocks,
+ bool &Changed, BasicBlock *Preheader);
+
+ virtual void getAnalysisUsage(AnalysisUsage& AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<LoopInfo>();
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved<DominanceFrontier>();
+ }
+ };
+}
+
+char LoopDeletion::ID = 0;
+INITIALIZE_PASS(LoopDeletion, "loop-deletion",
+ "Delete dead loops", false, false);
+
+Pass* llvm::createLoopDeletionPass() {
+ return new LoopDeletion();
+}
+
+/// IsLoopDead - Determined if a loop is dead. This assumes that we've already
+/// checked for unique exit and exiting blocks, and that the code is in LCSSA
+/// form.
+bool LoopDeletion::IsLoopDead(Loop* L,
+ SmallVector<BasicBlock*, 4>& exitingBlocks,
+ SmallVector<BasicBlock*, 4>& exitBlocks,
+ bool &Changed, BasicBlock *Preheader) {
+ BasicBlock* exitingBlock = exitingBlocks[0];
+ BasicBlock* exitBlock = exitBlocks[0];
+
+ // Make sure that all PHI entries coming from the loop are loop invariant.
+ // Because the code is in LCSSA form, any values used outside of the loop
+ // must pass through a PHI in the exit block, meaning that this check is
+ // sufficient to guarantee that no loop-variant values are used outside
+ // of the loop.
+ BasicBlock::iterator BI = exitBlock->begin();
+ while (PHINode* P = dyn_cast<PHINode>(BI)) {
+ Value* incoming = P->getIncomingValueForBlock(exitingBlock);
+ if (Instruction* I = dyn_cast<Instruction>(incoming))
+ if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
+ return false;
+
+ ++BI;
+ }
+
+ // Make sure that no instructions in the block have potential side-effects.
+ // This includes instructions that could write to memory, and loads that are
+ // marked volatile. This could be made more aggressive by using aliasing
+ // information to identify readonly and readnone calls.
+ for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+ LI != LE; ++LI) {
+ for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end();
+ BI != BE; ++BI) {
+ if (BI->mayHaveSideEffects())
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/// runOnLoop - Remove dead loops, by which we mean loops that do not impact the
+/// observable behavior of the program other than finite running time. Note
+/// we do ensure that this never remove a loop that might be infinite, as doing
+/// so could change the halting/non-halting nature of a program.
+/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
+/// in order to make various safety checks work.
+bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
+ // We can only remove the loop if there is a preheader that we can
+ // branch from after removing it.
+ BasicBlock* preheader = L->getLoopPreheader();
+ if (!preheader)
+ return false;
+
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->hasDedicatedExits())
+ return false;
+
+ // We can't remove loops that contain subloops. If the subloops were dead,
+ // they would already have been removed in earlier executions of this pass.
+ if (L->begin() != L->end())
+ return false;
+
+ SmallVector<BasicBlock*, 4> exitingBlocks;
+ L->getExitingBlocks(exitingBlocks);
+
+ SmallVector<BasicBlock*, 4> exitBlocks;
+ L->getUniqueExitBlocks(exitBlocks);
+
+ // We require that the loop only have a single exit block. Otherwise, we'd
+ // be in the situation of needing to be able to solve statically which exit
+ // block will be branched to, or trying to preserve the branching logic in
+ // a loop invariant manner.
+ if (exitBlocks.size() != 1)
+ return false;
+
+ // Loops with multiple exits are too complicated to handle correctly.
+ if (exitingBlocks.size() != 1)
+ return false;
+
+ // Finally, we have to check that the loop really is dead.
+ bool Changed = false;
+ if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
+ return Changed;
+
+ // Don't remove loops for which we can't solve the trip count.
+ // They could be infinite, in which case we'd be changing program behavior.
+ ScalarEvolution& SE = getAnalysis<ScalarEvolution>();
+ const SCEV *S = SE.getMaxBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(S))
+ return Changed;
+
+ // Now that we know the removal is safe, remove the loop by changing the
+ // branch from the preheader to go to the single exit block.
+ BasicBlock* exitBlock = exitBlocks[0];
+ BasicBlock* exitingBlock = exitingBlocks[0];
+
+ // Because we're deleting a large chunk of code at once, the sequence in which
+ // we remove things is very important to avoid invalidation issues. Don't
+ // mess with this unless you have good reason and know what you're doing.
+
+ // Tell ScalarEvolution that the loop is deleted. Do this before
+ // deleting the loop so that ScalarEvolution can look at the loop
+ // to determine what it needs to clean up.
+ SE.forgetLoop(L);
+
+ // Connect the preheader directly to the exit block.
+ TerminatorInst* TI = preheader->getTerminator();
+ TI->replaceUsesOfWith(L->getHeader(), exitBlock);
+
+ // Rewrite phis in the exit block to get their inputs from
+ // the preheader instead of the exiting block.
+ BasicBlock::iterator BI = exitBlock->begin();
+ while (PHINode* P = dyn_cast<PHINode>(BI)) {
+ P->replaceUsesOfWith(exitingBlock, preheader);
+ ++BI;
+ }
+
+ // Update the dominator tree and remove the instructions and blocks that will
+ // be deleted from the reference counting scheme.
+ DominatorTree& DT = getAnalysis<DominatorTree>();
+ DominanceFrontier* DF = getAnalysisIfAvailable<DominanceFrontier>();
+ SmallPtrSet<DomTreeNode*, 8> ChildNodes;
+ for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+ LI != LE; ++LI) {
+ // Move all of the block's children to be children of the preheader, which
+ // allows us to remove the domtree entry for the block.
+ ChildNodes.insert(DT[*LI]->begin(), DT[*LI]->end());
+ for (SmallPtrSet<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(),
+ DE = ChildNodes.end(); DI != DE; ++DI) {
+ DT.changeImmediateDominator(*DI, DT[preheader]);
+ if (DF) DF->changeImmediateDominator((*DI)->getBlock(), preheader, &DT);
+ }
+
+ ChildNodes.clear();
+ DT.eraseNode(*LI);
+ if (DF) DF->removeBlock(*LI);
+
+ // Remove the block from the reference counting scheme, so that we can
+ // delete it freely later.
+ (*LI)->dropAllReferences();
+ }
+
+ // Erase the instructions and the blocks without having to worry
+ // about ordering because we already dropped the references.
+ // NOTE: This iteration is safe because erasing the block does not remove its
+ // entry from the loop's block list. We do that in the next section.
+ for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+ LI != LE; ++LI)
+ (*LI)->eraseFromParent();
+
+ // Finally, the blocks from loopinfo. This has to happen late because
+ // otherwise our loop iterators won't work.
+ LoopInfo& loopInfo = getAnalysis<LoopInfo>();
+ SmallPtrSet<BasicBlock*, 8> blocks;
+ blocks.insert(L->block_begin(), L->block_end());
+ for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(),
+ E = blocks.end(); I != E; ++I)
+ loopInfo.removeBlock(*I);
+
+ // The last step is to inform the loop pass manager that we've
+ // eliminated this loop.
+ LPM.deleteLoopFromQueue(L);
+ Changed = true;
+
+ ++NumDeleted;
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp
new file mode 100644
index 0000000..a433674
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -0,0 +1,1270 @@
+//===- LoopIndexSplit.cpp - Loop Index Splitting Pass ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Loop Index Splitting Pass. This pass handles three
+// kinds of loops.
+//
+// [1] A loop may be eliminated if the body is executed exactly once.
+// For example,
+//
+// for (i = 0; i < N; ++i) {
+// if (i == X) {
+// body;
+// }
+// }
+//
+// is transformed to
+//
+// i = X;
+// body;
+//
+// [2] A loop's iteration space may be shrunk if the loop body is executed
+// for a proper sub-range of the loop's iteration space. For example,
+//
+// for (i = 0; i < N; ++i) {
+// if (i > A && i < B) {
+// ...
+// }
+// }
+//
+// is transformed to iterators from A to B, if A > 0 and B < N.
+//
+// [3] A loop may be split if the loop body is dominated by a branch.
+// For example,
+//
+// for (i = LB; i < UB; ++i) { if (i < SV) A; else B; }
+//
+// is transformed into
+//
+// AEV = BSV = SV
+// for (i = LB; i < min(UB, AEV); ++i)
+// A;
+// for (i = max(LB, BSV); i < UB; ++i);
+// B;
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-index-split"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+STATISTIC(NumIndexSplit, "Number of loop index split");
+STATISTIC(NumIndexSplitRemoved, "Number of loops eliminated by loop index split");
+STATISTIC(NumRestrictBounds, "Number of loop iteration space restricted");
+
+namespace {
+
+ class LoopIndexSplit : public LoopPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopIndexSplit() : LoopPass(ID) {}
+
+ // Index split Loop L. Return true if loop is split.
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<ScalarEvolution>();
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<DominanceFrontier>();
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominanceFrontier>();
+ }
+
+ private:
+ /// processOneIterationLoop -- Eliminate loop if loop body is executed
+ /// only once. For example,
+ /// for (i = 0; i < N; ++i) {
+ /// if ( i == X) {
+ /// ...
+ /// }
+ /// }
+ ///
+ bool processOneIterationLoop();
+
+ // -- Routines used by updateLoopIterationSpace();
+
+ /// updateLoopIterationSpace -- Update loop's iteration space if loop
+ /// body is executed for certain IV range only. For example,
+ ///
+ /// for (i = 0; i < N; ++i) {
+ /// if ( i > A && i < B) {
+ /// ...
+ /// }
+ /// }
+ /// is transformed to iterators from A to B, if A > 0 and B < N.
+ ///
+ bool updateLoopIterationSpace();
+
+ /// restrictLoopBound - Op dominates loop body. Op compares an IV based value
+ /// with a loop invariant value. Update loop's lower and upper bound based on
+ /// the loop invariant value.
+ bool restrictLoopBound(ICmpInst &Op);
+
+ // --- Routines used by splitLoop(). --- /
+
+ bool splitLoop();
+
+ /// removeBlocks - Remove basic block DeadBB and all blocks dominated by
+ /// DeadBB. This routine is used to remove split condition's dead branch,
+ /// dominated by DeadBB. LiveBB dominates split conidition's other branch.
+ void removeBlocks(BasicBlock *DeadBB, Loop *LP, BasicBlock *LiveBB);
+
+ /// moveExitCondition - Move exit condition EC into split condition block.
+ void moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB,
+ BasicBlock *ExitBB, ICmpInst *EC, ICmpInst *SC,
+ PHINode *IV, Instruction *IVAdd, Loop *LP,
+ unsigned);
+
+ /// updatePHINodes - CFG has been changed.
+ /// Before
+ /// - ExitBB's single predecessor was Latch
+ /// - Latch's second successor was Header
+ /// Now
+ /// - ExitBB's single predecessor was Header
+ /// - Latch's one and only successor was Header
+ ///
+ /// Update ExitBB PHINodes' to reflect this change.
+ void updatePHINodes(BasicBlock *ExitBB, BasicBlock *Latch,
+ BasicBlock *Header,
+ PHINode *IV, Instruction *IVIncrement, Loop *LP);
+
+ // --- Utility routines --- /
+
+ /// cleanBlock - A block is considered clean if all non terminal
+ /// instructions are either PHINodes or IV based values.
+ bool cleanBlock(BasicBlock *BB);
+
+ /// IVisLT - If Op is comparing IV based value with an loop invariant and
+ /// IV based value is less than the loop invariant then return the loop
+ /// invariant. Otherwise return NULL.
+ Value * IVisLT(ICmpInst &Op);
+
+ /// IVisLE - If Op is comparing IV based value with an loop invariant and
+ /// IV based value is less than or equal to the loop invariant then
+ /// return the loop invariant. Otherwise return NULL.
+ Value * IVisLE(ICmpInst &Op);
+
+ /// IVisGT - If Op is comparing IV based value with an loop invariant and
+ /// IV based value is greater than the loop invariant then return the loop
+ /// invariant. Otherwise return NULL.
+ Value * IVisGT(ICmpInst &Op);
+
+ /// IVisGE - If Op is comparing IV based value with an loop invariant and
+ /// IV based value is greater than or equal to the loop invariant then
+ /// return the loop invariant. Otherwise return NULL.
+ Value * IVisGE(ICmpInst &Op);
+
+ private:
+
+ // Current Loop information.
+ Loop *L;
+ LPPassManager *LPM;
+ LoopInfo *LI;
+ DominatorTree *DT;
+ DominanceFrontier *DF;
+
+ PHINode *IndVar;
+ ICmpInst *ExitCondition;
+ ICmpInst *SplitCondition;
+ Value *IVStartValue;
+ Value *IVExitValue;
+ Instruction *IVIncrement;
+ SmallPtrSet<Value *, 4> IVBasedValues;
+ };
+}
+
+char LoopIndexSplit::ID = 0;
+INITIALIZE_PASS(LoopIndexSplit, "loop-index-split",
+ "Index Split Loops", false, false);
+
+Pass *llvm::createLoopIndexSplitPass() {
+ return new LoopIndexSplit();
+}
+
+// Index split Loop L. Return true if loop is split.
+bool LoopIndexSplit::runOnLoop(Loop *IncomingLoop, LPPassManager &LPM_Ref) {
+ L = IncomingLoop;
+ LPM = &LPM_Ref;
+
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->isLoopSimplifyForm())
+ return false;
+
+ // FIXME - Nested loops make dominator info updates tricky.
+ if (!L->getSubLoops().empty())
+ return false;
+
+ DT = &getAnalysis<DominatorTree>();
+ LI = &getAnalysis<LoopInfo>();
+ DF = &getAnalysis<DominanceFrontier>();
+
+ // Initialize loop data.
+ IndVar = L->getCanonicalInductionVariable();
+ if (!IndVar) return false;
+
+ bool P1InLoop = L->contains(IndVar->getIncomingBlock(1));
+ IVStartValue = IndVar->getIncomingValue(!P1InLoop);
+ IVIncrement = dyn_cast<Instruction>(IndVar->getIncomingValue(P1InLoop));
+ if (!IVIncrement) return false;
+
+ IVBasedValues.clear();
+ IVBasedValues.insert(IndVar);
+ IVBasedValues.insert(IVIncrement);
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I)
+ for(BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end();
+ BI != BE; ++BI) {
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(BI))
+ if (BO != IVIncrement
+ && (BO->getOpcode() == Instruction::Add
+ || BO->getOpcode() == Instruction::Sub))
+ if (IVBasedValues.count(BO->getOperand(0))
+ && L->isLoopInvariant(BO->getOperand(1)))
+ IVBasedValues.insert(BO);
+ }
+
+ // Reject loop if loop exit condition is not suitable.
+ BasicBlock *ExitingBlock = L->getExitingBlock();
+ if (!ExitingBlock)
+ return false;
+ BranchInst *EBR = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!EBR) return false;
+ ExitCondition = dyn_cast<ICmpInst>(EBR->getCondition());
+ if (!ExitCondition) return false;
+ if (ExitingBlock != L->getLoopLatch()) return false;
+ IVExitValue = ExitCondition->getOperand(1);
+ if (!L->isLoopInvariant(IVExitValue))
+ IVExitValue = ExitCondition->getOperand(0);
+ if (!L->isLoopInvariant(IVExitValue))
+ return false;
+ if (!IVBasedValues.count(
+ ExitCondition->getOperand(IVExitValue == ExitCondition->getOperand(0))))
+ return false;
+
+ // If start value is more then exit value where induction variable
+ // increments by 1 then we are potentially dealing with an infinite loop.
+ // Do not index split this loop.
+ if (ConstantInt *SV = dyn_cast<ConstantInt>(IVStartValue))
+ if (ConstantInt *EV = dyn_cast<ConstantInt>(IVExitValue))
+ if (SV->getSExtValue() > EV->getSExtValue())
+ return false;
+
+ if (processOneIterationLoop())
+ return true;
+
+ if (updateLoopIterationSpace())
+ return true;
+
+ if (splitLoop())
+ return true;
+
+ return false;
+}
+
+// --- Helper routines ---
+// isUsedOutsideLoop - Returns true iff V is used outside the loop L.
+static bool isUsedOutsideLoop(Value *V, Loop *L) {
+ for(Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
+ if (!L->contains(cast<Instruction>(*UI)))
+ return true;
+ return false;
+}
+
+// Return V+1
+static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt,
+ LLVMContext &Context) {
+ Constant *One = ConstantInt::get(V->getType(), 1, Sign);
+ return BinaryOperator::CreateAdd(V, One, "lsp", InsertPt);
+}
+
+// Return V-1
+static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt,
+ LLVMContext &Context) {
+ Constant *One = ConstantInt::get(V->getType(), 1, Sign);
+ return BinaryOperator::CreateSub(V, One, "lsp", InsertPt);
+}
+
+// Return min(V1, V1)
+static Value *getMin(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) {
+
+ Value *C = new ICmpInst(InsertPt,
+ Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ V1, V2, "lsp");
+ return SelectInst::Create(C, V1, V2, "lsp", InsertPt);
+}
+
+// Return max(V1, V2)
+static Value *getMax(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) {
+
+ Value *C = new ICmpInst(InsertPt,
+ Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ V1, V2, "lsp");
+ return SelectInst::Create(C, V2, V1, "lsp", InsertPt);
+}
+
+/// processOneIterationLoop -- Eliminate loop if loop body is executed
+/// only once. For example,
+/// for (i = 0; i < N; ++i) {
+/// if ( i == X) {
+/// ...
+/// }
+/// }
+///
+bool LoopIndexSplit::processOneIterationLoop() {
+ SplitCondition = NULL;
+ BasicBlock *Latch = L->getLoopLatch();
+ BasicBlock *Header = L->getHeader();
+ BranchInst *BR = dyn_cast<BranchInst>(Header->getTerminator());
+ if (!BR) return false;
+ if (!isa<BranchInst>(Latch->getTerminator())) return false;
+ if (BR->isUnconditional()) return false;
+ SplitCondition = dyn_cast<ICmpInst>(BR->getCondition());
+ if (!SplitCondition) return false;
+ if (SplitCondition == ExitCondition) return false;
+ if (SplitCondition->getPredicate() != ICmpInst::ICMP_EQ) return false;
+ if (BR->getOperand(1) != Latch) return false;
+ if (!IVBasedValues.count(SplitCondition->getOperand(0))
+ && !IVBasedValues.count(SplitCondition->getOperand(1)))
+ return false;
+
+ // If IV is used outside the loop then this loop traversal is required.
+ // FIXME: Calculate and use last IV value.
+ if (isUsedOutsideLoop(IVIncrement, L))
+ return false;
+
+ // If BR operands are not IV or not loop invariants then skip this loop.
+ Value *OPV = SplitCondition->getOperand(0);
+ Value *SplitValue = SplitCondition->getOperand(1);
+ if (!L->isLoopInvariant(SplitValue))
+ std::swap(OPV, SplitValue);
+ if (!L->isLoopInvariant(SplitValue))
+ return false;
+ Instruction *OPI = dyn_cast<Instruction>(OPV);
+ if (!OPI)
+ return false;
+ if (OPI->getParent() != Header || isUsedOutsideLoop(OPI, L))
+ return false;
+ Value *StartValue = IVStartValue;
+ Value *ExitValue = IVExitValue;;
+
+ if (OPV != IndVar) {
+ // If BR operand is IV based then use this operand to calculate
+ // effective conditions for loop body.
+ BinaryOperator *BOPV = dyn_cast<BinaryOperator>(OPV);
+ if (!BOPV)
+ return false;
+ if (BOPV->getOpcode() != Instruction::Add)
+ return false;
+ StartValue = BinaryOperator::CreateAdd(OPV, StartValue, "" , BR);
+ ExitValue = BinaryOperator::CreateAdd(OPV, ExitValue, "" , BR);
+ }
+
+ if (!cleanBlock(Header))
+ return false;
+
+ if (!cleanBlock(Latch))
+ return false;
+
+ // If the merge point for BR is not loop latch then skip this loop.
+ if (BR->getSuccessor(0) != Latch) {
+ DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
+ assert (DF0 != DF->end() && "Unable to find dominance frontier");
+ if (!DF0->second.count(Latch))
+ return false;
+ }
+
+ if (BR->getSuccessor(1) != Latch) {
+ DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
+ assert (DF1 != DF->end() && "Unable to find dominance frontier");
+ if (!DF1->second.count(Latch))
+ return false;
+ }
+
+ // Now, Current loop L contains compare instruction
+ // that compares induction variable, IndVar, against loop invariant. And
+ // entire (i.e. meaningful) loop body is dominated by this compare
+ // instruction. In such case eliminate
+ // loop structure surrounding this loop body. For example,
+ // for (int i = start; i < end; ++i) {
+ // if ( i == somevalue) {
+ // loop_body
+ // }
+ // }
+ // can be transformed into
+ // if (somevalue >= start && somevalue < end) {
+ // i = somevalue;
+ // loop_body
+ // }
+
+ // Replace index variable with split value in loop body. Loop body is executed
+ // only when index variable is equal to split value.
+ IndVar->replaceAllUsesWith(SplitValue);
+
+ // Replace split condition in header.
+ // Transform
+ // SplitCondition : icmp eq i32 IndVar, SplitValue
+ // into
+ // c1 = icmp uge i32 SplitValue, StartValue
+ // c2 = icmp ult i32 SplitValue, ExitValue
+ // and i32 c1, c2
+ Instruction *C1 = new ICmpInst(BR, ExitCondition->isSigned() ?
+ ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE,
+ SplitValue, StartValue, "lisplit");
+
+ CmpInst::Predicate C2P = ExitCondition->getPredicate();
+ BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
+ if (LatchBR->getOperand(1) != Header)
+ C2P = CmpInst::getInversePredicate(C2P);
+ Instruction *C2 = new ICmpInst(BR, C2P, SplitValue, ExitValue, "lisplit");
+ Instruction *NSplitCond = BinaryOperator::CreateAnd(C1, C2, "lisplit", BR);
+
+ SplitCondition->replaceAllUsesWith(NSplitCond);
+ SplitCondition->eraseFromParent();
+
+ // Remove Latch to Header edge.
+ BasicBlock *LatchSucc = NULL;
+ Header->removePredecessor(Latch);
+ for (succ_iterator SI = succ_begin(Latch), E = succ_end(Latch);
+ SI != E; ++SI) {
+ if (Header != *SI)
+ LatchSucc = *SI;
+ }
+
+ // Clean up latch block.
+ Value *LatchBRCond = LatchBR->getCondition();
+ LatchBR->setUnconditionalDest(LatchSucc);
+ RecursivelyDeleteTriviallyDeadInstructions(LatchBRCond);
+
+ LPM->deleteLoopFromQueue(L);
+
+ // Update Dominator Info.
+ // Only CFG change done is to remove Latch to Header edge. This
+ // does not change dominator tree because Latch did not dominate
+ // Header.
+ if (DF) {
+ DominanceFrontier::iterator HeaderDF = DF->find(Header);
+ if (HeaderDF != DF->end())
+ DF->removeFromFrontier(HeaderDF, Header);
+
+ DominanceFrontier::iterator LatchDF = DF->find(Latch);
+ if (LatchDF != DF->end())
+ DF->removeFromFrontier(LatchDF, Header);
+ }
+
+ ++NumIndexSplitRemoved;
+ return true;
+}
+
+/// restrictLoopBound - Op dominates loop body. Op compares an IV based value
+/// with a loop invariant value. Update loop's lower and upper bound based on
+/// the loop invariant value.
+bool LoopIndexSplit::restrictLoopBound(ICmpInst &Op) {
+ bool Sign = Op.isSigned();
+ Instruction *PHTerm = L->getLoopPreheader()->getTerminator();
+
+ if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) {
+ BranchInst *EBR =
+ cast<BranchInst>(ExitCondition->getParent()->getTerminator());
+ ExitCondition->setPredicate(ExitCondition->getInversePredicate());
+ BasicBlock *T = EBR->getSuccessor(0);
+ EBR->setSuccessor(0, EBR->getSuccessor(1));
+ EBR->setSuccessor(1, T);
+ }
+
+ LLVMContext &Context = Op.getContext();
+
+ // New upper and lower bounds.
+ Value *NLB = NULL;
+ Value *NUB = NULL;
+ if (Value *V = IVisLT(Op)) {
+ // Restrict upper bound.
+ if (IVisLE(*ExitCondition))
+ V = getMinusOne(V, Sign, PHTerm, Context);
+ NUB = getMin(V, IVExitValue, Sign, PHTerm);
+ } else if (Value *V = IVisLE(Op)) {
+ // Restrict upper bound.
+ if (IVisLT(*ExitCondition))
+ V = getPlusOne(V, Sign, PHTerm, Context);
+ NUB = getMin(V, IVExitValue, Sign, PHTerm);
+ } else if (Value *V = IVisGT(Op)) {
+ // Restrict lower bound.
+ V = getPlusOne(V, Sign, PHTerm, Context);
+ NLB = getMax(V, IVStartValue, Sign, PHTerm);
+ } else if (Value *V = IVisGE(Op))
+ // Restrict lower bound.
+ NLB = getMax(V, IVStartValue, Sign, PHTerm);
+
+ if (!NLB && !NUB)
+ return false;
+
+ if (NLB) {
+ unsigned i = IndVar->getBasicBlockIndex(L->getLoopPreheader());
+ IndVar->setIncomingValue(i, NLB);
+ }
+
+ if (NUB) {
+ unsigned i = (ExitCondition->getOperand(0) != IVExitValue);
+ ExitCondition->setOperand(i, NUB);
+ }
+ return true;
+}
+
+/// updateLoopIterationSpace -- Update loop's iteration space if loop
+/// body is executed for certain IV range only. For example,
+///
+/// for (i = 0; i < N; ++i) {
+/// if ( i > A && i < B) {
+/// ...
+/// }
+/// }
+/// is transformed to iterators from A to B, if A > 0 and B < N.
+///
+bool LoopIndexSplit::updateLoopIterationSpace() {
+ SplitCondition = NULL;
+ if (ExitCondition->getPredicate() == ICmpInst::ICMP_NE
+ || ExitCondition->getPredicate() == ICmpInst::ICMP_EQ)
+ return false;
+ BasicBlock *Latch = L->getLoopLatch();
+ BasicBlock *Header = L->getHeader();
+ BranchInst *BR = dyn_cast<BranchInst>(Header->getTerminator());
+ if (!BR) return false;
+ if (!isa<BranchInst>(Latch->getTerminator())) return false;
+ if (BR->isUnconditional()) return false;
+ BinaryOperator *AND = dyn_cast<BinaryOperator>(BR->getCondition());
+ if (!AND) return false;
+ if (AND->getOpcode() != Instruction::And) return false;
+ ICmpInst *Op0 = dyn_cast<ICmpInst>(AND->getOperand(0));
+ ICmpInst *Op1 = dyn_cast<ICmpInst>(AND->getOperand(1));
+ if (!Op0 || !Op1)
+ return false;
+ IVBasedValues.insert(AND);
+ IVBasedValues.insert(Op0);
+ IVBasedValues.insert(Op1);
+ if (!cleanBlock(Header)) return false;
+ BasicBlock *ExitingBlock = ExitCondition->getParent();
+ if (!cleanBlock(ExitingBlock)) return false;
+
+ // If the merge point for BR is not loop latch then skip this loop.
+ if (BR->getSuccessor(0) != Latch) {
+ DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
+ assert (DF0 != DF->end() && "Unable to find dominance frontier");
+ if (!DF0->second.count(Latch))
+ return false;
+ }
+
+ if (BR->getSuccessor(1) != Latch) {
+ DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
+ assert (DF1 != DF->end() && "Unable to find dominance frontier");
+ if (!DF1->second.count(Latch))
+ return false;
+ }
+
+ // Verify that loop exiting block has only two predecessor, where one pred
+ // is split condition block. The other predecessor will become exiting block's
+ // dominator after CFG is updated. TODO : Handle CFG's where exiting block has
+ // more then two predecessors. This requires extra work in updating dominator
+ // information.
+ BasicBlock *ExitingBBPred = NULL;
+ for (pred_iterator PI = pred_begin(ExitingBlock), PE = pred_end(ExitingBlock);
+ PI != PE; ++PI) {
+ BasicBlock *BB = *PI;
+ if (Header == BB)
+ continue;
+ if (ExitingBBPred)
+ return false;
+ else
+ ExitingBBPred = BB;
+ }
+
+ if (!restrictLoopBound(*Op0))
+ return false;
+
+ if (!restrictLoopBound(*Op1))
+ return false;
+
+ // Update CFG.
+ if (BR->getSuccessor(0) == ExitingBlock)
+ BR->setUnconditionalDest(BR->getSuccessor(1));
+ else
+ BR->setUnconditionalDest(BR->getSuccessor(0));
+
+ AND->eraseFromParent();
+ if (Op0->use_empty())
+ Op0->eraseFromParent();
+ if (Op1->use_empty())
+ Op1->eraseFromParent();
+
+ // Update domiantor info. Now, ExitingBlock has only one predecessor,
+ // ExitingBBPred, and it is ExitingBlock's immediate domiantor.
+ DT->changeImmediateDominator(ExitingBlock, ExitingBBPred);
+
+ BasicBlock *ExitBlock = ExitingBlock->getTerminator()->getSuccessor(1);
+ if (L->contains(ExitBlock))
+ ExitBlock = ExitingBlock->getTerminator()->getSuccessor(0);
+
+ // If ExitingBlock is a member of the loop basic blocks' DF list then
+ // replace ExitingBlock with header and exit block in the DF list
+ DominanceFrontier::iterator ExitingBlockDF = DF->find(ExitingBlock);
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+ if (BB == Header || BB == ExitingBlock)
+ continue;
+ DominanceFrontier::iterator BBDF = DF->find(BB);
+ DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin();
+ DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end();
+ while (DomSetI != DomSetE) {
+ DominanceFrontier::DomSetType::iterator CurrentItr = DomSetI;
+ ++DomSetI;
+ BasicBlock *DFBB = *CurrentItr;
+ if (DFBB == ExitingBlock) {
+ BBDF->second.erase(DFBB);
+ for (DominanceFrontier::DomSetType::iterator
+ EBI = ExitingBlockDF->second.begin(),
+ EBE = ExitingBlockDF->second.end(); EBI != EBE; ++EBI)
+ BBDF->second.insert(*EBI);
+ }
+ }
+ }
+ ++NumRestrictBounds;
+ return true;
+}
+
+/// removeBlocks - Remove basic block DeadBB and all blocks dominated by DeadBB.
+/// This routine is used to remove split condition's dead branch, dominated by
+/// DeadBB. LiveBB dominates split conidition's other branch.
+void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP,
+ BasicBlock *LiveBB) {
+
+ // First update DeadBB's dominance frontier.
+ SmallVector<BasicBlock *, 8> FrontierBBs;
+ DominanceFrontier::iterator DeadBBDF = DF->find(DeadBB);
+ if (DeadBBDF != DF->end()) {
+ SmallVector<BasicBlock *, 8> PredBlocks;
+
+ DominanceFrontier::DomSetType DeadBBSet = DeadBBDF->second;
+ for (DominanceFrontier::DomSetType::iterator DeadBBSetI = DeadBBSet.begin(),
+ DeadBBSetE = DeadBBSet.end(); DeadBBSetI != DeadBBSetE; ++DeadBBSetI)
+ {
+ BasicBlock *FrontierBB = *DeadBBSetI;
+ FrontierBBs.push_back(FrontierBB);
+
+ // Rremove any PHI incoming edge from blocks dominated by DeadBB.
+ PredBlocks.clear();
+ for(pred_iterator PI = pred_begin(FrontierBB), PE = pred_end(FrontierBB);
+ PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (DT->dominates(DeadBB, P))
+ PredBlocks.push_back(P);
+ }
+
+ for(BasicBlock::iterator FBI = FrontierBB->begin(), FBE = FrontierBB->end();
+ FBI != FBE; ++FBI) {
+ if (PHINode *PN = dyn_cast<PHINode>(FBI)) {
+ for(SmallVector<BasicBlock *, 8>::iterator PI = PredBlocks.begin(),
+ PE = PredBlocks.end(); PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ PN->removeIncomingValue(P);
+ }
+ }
+ else
+ break;
+ }
+ }
+ }
+
+ // Now remove DeadBB and all nodes dominated by DeadBB in df order.
+ SmallVector<BasicBlock *, 32> WorkList;
+ DomTreeNode *DN = DT->getNode(DeadBB);
+ for (df_iterator<DomTreeNode*> DI = df_begin(DN),
+ E = df_end(DN); DI != E; ++DI) {
+ BasicBlock *BB = DI->getBlock();
+ WorkList.push_back(BB);
+ BB->replaceAllUsesWith(UndefValue::get(
+ Type::getLabelTy(DeadBB->getContext())));
+ }
+
+ while (!WorkList.empty()) {
+ BasicBlock *BB = WorkList.pop_back_val();
+ LPM->deleteSimpleAnalysisValue(BB, LP);
+ for(BasicBlock::iterator BBI = BB->begin(), BBE = BB->end();
+ BBI != BBE; ) {
+ Instruction *I = BBI;
+ ++BBI;
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ LPM->deleteSimpleAnalysisValue(I, LP);
+ I->eraseFromParent();
+ }
+ DT->eraseNode(BB);
+ DF->removeBlock(BB);
+ LI->removeBlock(BB);
+ BB->eraseFromParent();
+ }
+
+ // Update Frontier BBs' dominator info.
+ while (!FrontierBBs.empty()) {
+ BasicBlock *FBB = FrontierBBs.pop_back_val();
+ BasicBlock *NewDominator = FBB->getSinglePredecessor();
+ if (!NewDominator) {
+ pred_iterator PI = pred_begin(FBB), PE = pred_end(FBB);
+ NewDominator = *PI;
+ ++PI;
+ if (NewDominator != LiveBB) {
+ for(; PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (P == LiveBB) {
+ NewDominator = LiveBB;
+ break;
+ }
+ NewDominator = DT->findNearestCommonDominator(NewDominator, P);
+ }
+ }
+ }
+ assert (NewDominator && "Unable to fix dominator info.");
+ DT->changeImmediateDominator(FBB, NewDominator);
+ DF->changeImmediateDominator(FBB, NewDominator, DT);
+ }
+
+}
+
+// moveExitCondition - Move exit condition EC into split condition block CondBB.
+void LoopIndexSplit::moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB,
+ BasicBlock *ExitBB, ICmpInst *EC,
+ ICmpInst *SC, PHINode *IV,
+ Instruction *IVAdd, Loop *LP,
+ unsigned ExitValueNum) {
+
+ BasicBlock *ExitingBB = EC->getParent();
+ Instruction *CurrentBR = CondBB->getTerminator();
+
+ // Move exit condition into split condition block.
+ EC->moveBefore(CurrentBR);
+ EC->setOperand(ExitValueNum == 0 ? 1 : 0, IV);
+
+ // Move exiting block's branch into split condition block. Update its branch
+ // destination.
+ BranchInst *ExitingBR = cast<BranchInst>(ExitingBB->getTerminator());
+ ExitingBR->moveBefore(CurrentBR);
+ BasicBlock *OrigDestBB = NULL;
+ if (ExitingBR->getSuccessor(0) == ExitBB) {
+ OrigDestBB = ExitingBR->getSuccessor(1);
+ ExitingBR->setSuccessor(1, ActiveBB);
+ }
+ else {
+ OrigDestBB = ExitingBR->getSuccessor(0);
+ ExitingBR->setSuccessor(0, ActiveBB);
+ }
+
+ // Remove split condition and current split condition branch.
+ SC->eraseFromParent();
+ CurrentBR->eraseFromParent();
+
+ // Connect exiting block to original destination.
+ BranchInst::Create(OrigDestBB, ExitingBB);
+
+ // Update PHINodes
+ updatePHINodes(ExitBB, ExitingBB, CondBB, IV, IVAdd, LP);
+
+ // Fix dominator info.
+ // ExitBB is now dominated by CondBB
+ DT->changeImmediateDominator(ExitBB, CondBB);
+ DF->changeImmediateDominator(ExitBB, CondBB, DT);
+
+ // Blocks outside the loop may have been in the dominance frontier of blocks
+ // inside the condition; this is now impossible because the blocks inside the
+ // condition no loger dominate the exit. Remove the relevant blocks from
+ // the dominance frontiers.
+ for (Loop::block_iterator I = LP->block_begin(), E = LP->block_end();
+ I != E; ++I) {
+ if (!DT->properlyDominates(CondBB, *I)) continue;
+ DominanceFrontier::iterator BBDF = DF->find(*I);
+ DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin();
+ DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end();
+ while (DomSetI != DomSetE) {
+ DominanceFrontier::DomSetType::iterator CurrentItr = DomSetI;
+ ++DomSetI;
+ BasicBlock *DFBB = *CurrentItr;
+ if (!LP->contains(DFBB))
+ BBDF->second.erase(DFBB);
+ }
+ }
+}
+
+/// updatePHINodes - CFG has been changed.
+/// Before
+/// - ExitBB's single predecessor was Latch
+/// - Latch's second successor was Header
+/// Now
+/// - ExitBB's single predecessor is Header
+/// - Latch's one and only successor is Header
+///
+/// Update ExitBB PHINodes' to reflect this change.
+void LoopIndexSplit::updatePHINodes(BasicBlock *ExitBB, BasicBlock *Latch,
+ BasicBlock *Header,
+ PHINode *IV, Instruction *IVIncrement,
+ Loop *LP) {
+
+ for (BasicBlock::iterator BI = ExitBB->begin(), BE = ExitBB->end();
+ BI != BE; ) {
+ PHINode *PN = dyn_cast<PHINode>(BI);
+ ++BI;
+ if (!PN)
+ break;
+
+ Value *V = PN->getIncomingValueForBlock(Latch);
+ if (PHINode *PHV = dyn_cast<PHINode>(V)) {
+ // PHV is in Latch. PHV has one use is in ExitBB PHINode. And one use
+ // in Header which is new incoming value for PN.
+ Value *NewV = NULL;
+ for (Value::use_iterator UI = PHV->use_begin(), E = PHV->use_end();
+ UI != E; ++UI)
+ if (PHINode *U = dyn_cast<PHINode>(*UI))
+ if (LP->contains(U)) {
+ NewV = U;
+ break;
+ }
+
+ // Add incoming value from header only if PN has any use inside the loop.
+ if (NewV)
+ PN->addIncoming(NewV, Header);
+
+ } else if (Instruction *PHI = dyn_cast<Instruction>(V)) {
+ // If this instruction is IVIncrement then IV is new incoming value
+ // from header otherwise this instruction must be incoming value from
+ // header because loop is in LCSSA form.
+ if (PHI == IVIncrement)
+ PN->addIncoming(IV, Header);
+ else
+ PN->addIncoming(V, Header);
+ } else
+ // Otherwise this is an incoming value from header because loop is in
+ // LCSSA form.
+ PN->addIncoming(V, Header);
+
+ // Remove incoming value from Latch.
+ PN->removeIncomingValue(Latch);
+ }
+}
+
+bool LoopIndexSplit::splitLoop() {
+ SplitCondition = NULL;
+ if (ExitCondition->getPredicate() == ICmpInst::ICMP_NE
+ || ExitCondition->getPredicate() == ICmpInst::ICMP_EQ)
+ return false;
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ BranchInst *SBR = NULL; // Split Condition Branch
+ BranchInst *EBR = cast<BranchInst>(ExitCondition->getParent()->getTerminator());
+ // If Exiting block includes loop variant instructions then this
+ // loop may not be split safely.
+ BasicBlock *ExitingBlock = ExitCondition->getParent();
+ if (!cleanBlock(ExitingBlock)) return false;
+
+ LLVMContext &Context = Header->getContext();
+
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I) {
+ BranchInst *BR = dyn_cast<BranchInst>((*I)->getTerminator());
+ if (!BR || BR->isUnconditional()) continue;
+ ICmpInst *CI = dyn_cast<ICmpInst>(BR->getCondition());
+ if (!CI || CI == ExitCondition
+ || CI->getPredicate() == ICmpInst::ICMP_NE
+ || CI->getPredicate() == ICmpInst::ICMP_EQ)
+ continue;
+
+ // Unable to handle triangle loops at the moment.
+ // In triangle loop, split condition is in header and one of the
+ // the split destination is loop latch. If split condition is EQ
+ // then such loops are already handle in processOneIterationLoop().
+ if (Header == (*I)
+ && (Latch == BR->getSuccessor(0) || Latch == BR->getSuccessor(1)))
+ continue;
+
+ // If the block does not dominate the latch then this is not a diamond.
+ // Such loop may not benefit from index split.
+ if (!DT->dominates((*I), Latch))
+ continue;
+
+ // If split condition branches heads do not have single predecessor,
+ // SplitCondBlock, then is not possible to remove inactive branch.
+ if (!BR->getSuccessor(0)->getSinglePredecessor()
+ || !BR->getSuccessor(1)->getSinglePredecessor())
+ return false;
+
+ // If the merge point for BR is not loop latch then skip this condition.
+ if (BR->getSuccessor(0) != Latch) {
+ DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
+ assert (DF0 != DF->end() && "Unable to find dominance frontier");
+ if (!DF0->second.count(Latch))
+ continue;
+ }
+
+ if (BR->getSuccessor(1) != Latch) {
+ DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
+ assert (DF1 != DF->end() && "Unable to find dominance frontier");
+ if (!DF1->second.count(Latch))
+ continue;
+ }
+ SplitCondition = CI;
+ SBR = BR;
+ break;
+ }
+
+ if (!SplitCondition)
+ return false;
+
+ // If the predicate sign does not match then skip.
+ if (ExitCondition->isSigned() != SplitCondition->isSigned())
+ return false;
+
+ unsigned EVOpNum = (ExitCondition->getOperand(1) == IVExitValue);
+ unsigned SVOpNum = IVBasedValues.count(SplitCondition->getOperand(0));
+ Value *SplitValue = SplitCondition->getOperand(SVOpNum);
+ if (!L->isLoopInvariant(SplitValue))
+ return false;
+ if (!IVBasedValues.count(SplitCondition->getOperand(!SVOpNum)))
+ return false;
+
+ // Check for side effects.
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+
+ assert(DT->dominates(Header, BB));
+ if (DT->properlyDominates(SplitCondition->getParent(), BB))
+ continue;
+
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+ BI != BE; ++BI) {
+ Instruction *Inst = BI;
+
+ if (!Inst->isSafeToSpeculativelyExecute() && !isa<PHINode>(Inst)
+ && !isa<BranchInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst))
+ return false;
+ }
+ }
+
+ // Normalize loop conditions so that it is easier to calculate new loop
+ // bounds.
+ if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) {
+ ExitCondition->setPredicate(ExitCondition->getInversePredicate());
+ BasicBlock *T = EBR->getSuccessor(0);
+ EBR->setSuccessor(0, EBR->getSuccessor(1));
+ EBR->setSuccessor(1, T);
+ }
+
+ if (IVisGT(*SplitCondition) || IVisGE(*SplitCondition)) {
+ SplitCondition->setPredicate(SplitCondition->getInversePredicate());
+ BasicBlock *T = SBR->getSuccessor(0);
+ SBR->setSuccessor(0, SBR->getSuccessor(1));
+ SBR->setSuccessor(1, T);
+ }
+
+ //[*] Calculate new loop bounds.
+ Value *AEV = SplitValue;
+ Value *BSV = SplitValue;
+ bool Sign = SplitCondition->isSigned();
+ Instruction *PHTerm = L->getLoopPreheader()->getTerminator();
+
+ if (IVisLT(*ExitCondition)) {
+ if (IVisLT(*SplitCondition)) {
+ /* Do nothing */
+ }
+ else if (IVisLE(*SplitCondition)) {
+ AEV = getPlusOne(SplitValue, Sign, PHTerm, Context);
+ BSV = getPlusOne(SplitValue, Sign, PHTerm, Context);
+ } else {
+ assert (0 && "Unexpected split condition!");
+ }
+ }
+ else if (IVisLE(*ExitCondition)) {
+ if (IVisLT(*SplitCondition)) {
+ AEV = getMinusOne(SplitValue, Sign, PHTerm, Context);
+ }
+ else if (IVisLE(*SplitCondition)) {
+ BSV = getPlusOne(SplitValue, Sign, PHTerm, Context);
+ } else {
+ assert (0 && "Unexpected split condition!");
+ }
+ } else {
+ assert (0 && "Unexpected exit condition!");
+ }
+ AEV = getMin(AEV, IVExitValue, Sign, PHTerm);
+ BSV = getMax(BSV, IVStartValue, Sign, PHTerm);
+
+ // [*] Clone Loop
+ ValueMap<const Value *, Value *> VMap;
+ Loop *BLoop = CloneLoop(L, LPM, LI, VMap, this);
+ Loop *ALoop = L;
+
+ // [*] ALoop's exiting edge enters BLoop's header.
+ // ALoop's original exit block becomes BLoop's exit block.
+ PHINode *B_IndVar = cast<PHINode>(VMap[IndVar]);
+ BasicBlock *A_ExitingBlock = ExitCondition->getParent();
+ BranchInst *A_ExitInsn =
+ dyn_cast<BranchInst>(A_ExitingBlock->getTerminator());
+ assert (A_ExitInsn && "Unable to find suitable loop exit branch");
+ BasicBlock *B_ExitBlock = A_ExitInsn->getSuccessor(1);
+ BasicBlock *B_Header = BLoop->getHeader();
+ if (ALoop->contains(B_ExitBlock)) {
+ B_ExitBlock = A_ExitInsn->getSuccessor(0);
+ A_ExitInsn->setSuccessor(0, B_Header);
+ } else
+ A_ExitInsn->setSuccessor(1, B_Header);
+
+ // [*] Update ALoop's exit value using new exit value.
+ ExitCondition->setOperand(EVOpNum, AEV);
+
+ // [*] Update BLoop's header phi nodes. Remove incoming PHINode's from
+ // original loop's preheader. Add incoming PHINode values from
+ // ALoop's exiting block. Update BLoop header's domiantor info.
+
+ // Collect inverse map of Header PHINodes.
+ DenseMap<Value *, Value *> InverseMap;
+ for (BasicBlock::iterator BI = ALoop->getHeader()->begin(),
+ BE = ALoop->getHeader()->end(); BI != BE; ++BI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BI)) {
+ PHINode *PNClone = cast<PHINode>(VMap[PN]);
+ InverseMap[PNClone] = PN;
+ } else
+ break;
+ }
+
+ BasicBlock *A_Preheader = ALoop->getLoopPreheader();
+ for (BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end();
+ BI != BE; ++BI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BI)) {
+ // Remove incoming value from original preheader.
+ PN->removeIncomingValue(A_Preheader);
+
+ // Add incoming value from A_ExitingBlock.
+ if (PN == B_IndVar)
+ PN->addIncoming(BSV, A_ExitingBlock);
+ else {
+ PHINode *OrigPN = cast<PHINode>(InverseMap[PN]);
+ Value *V2 = NULL;
+ // If loop header is also loop exiting block then
+ // OrigPN is incoming value for B loop header.
+ if (A_ExitingBlock == ALoop->getHeader())
+ V2 = OrigPN;
+ else
+ V2 = OrigPN->getIncomingValueForBlock(A_ExitingBlock);
+ PN->addIncoming(V2, A_ExitingBlock);
+ }
+ } else
+ break;
+ }
+
+ DT->changeImmediateDominator(B_Header, A_ExitingBlock);
+ DF->changeImmediateDominator(B_Header, A_ExitingBlock, DT);
+
+ // [*] Update BLoop's exit block. Its new predecessor is BLoop's exit
+ // block. Remove incoming PHINode values from ALoop's exiting block.
+ // Add new incoming values from BLoop's incoming exiting value.
+ // Update BLoop exit block's dominator info..
+ BasicBlock *B_ExitingBlock = cast<BasicBlock>(VMap[A_ExitingBlock]);
+ for (BasicBlock::iterator BI = B_ExitBlock->begin(), BE = B_ExitBlock->end();
+ BI != BE; ++BI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BI)) {
+ PN->addIncoming(VMap[PN->getIncomingValueForBlock(A_ExitingBlock)],
+ B_ExitingBlock);
+ PN->removeIncomingValue(A_ExitingBlock);
+ } else
+ break;
+ }
+
+ DT->changeImmediateDominator(B_ExitBlock, B_ExitingBlock);
+ DF->changeImmediateDominator(B_ExitBlock, B_ExitingBlock, DT);
+
+ //[*] Split ALoop's exit edge. This creates a new block which
+ // serves two purposes. First one is to hold PHINode defnitions
+ // to ensure that ALoop's LCSSA form. Second use it to act
+ // as a preheader for BLoop.
+ BasicBlock *A_ExitBlock = SplitEdge(A_ExitingBlock, B_Header, this);
+
+ //[*] Preserve ALoop's LCSSA form. Create new forwarding PHINodes
+ // in A_ExitBlock to redefine outgoing PHI definitions from ALoop.
+ for(BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end();
+ BI != BE; ++BI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BI)) {
+ Value *V1 = PN->getIncomingValueForBlock(A_ExitBlock);
+ PHINode *newPHI = PHINode::Create(PN->getType(), PN->getName());
+ newPHI->addIncoming(V1, A_ExitingBlock);
+ A_ExitBlock->getInstList().push_front(newPHI);
+ PN->removeIncomingValue(A_ExitBlock);
+ PN->addIncoming(newPHI, A_ExitBlock);
+ } else
+ break;
+ }
+
+ //[*] Eliminate split condition's inactive branch from ALoop.
+ BasicBlock *A_SplitCondBlock = SplitCondition->getParent();
+ BranchInst *A_BR = cast<BranchInst>(A_SplitCondBlock->getTerminator());
+ BasicBlock *A_InactiveBranch = NULL;
+ BasicBlock *A_ActiveBranch = NULL;
+ A_ActiveBranch = A_BR->getSuccessor(0);
+ A_InactiveBranch = A_BR->getSuccessor(1);
+ A_BR->setUnconditionalDest(A_ActiveBranch);
+ removeBlocks(A_InactiveBranch, L, A_ActiveBranch);
+
+ //[*] Eliminate split condition's inactive branch in from BLoop.
+ BasicBlock *B_SplitCondBlock = cast<BasicBlock>(VMap[A_SplitCondBlock]);
+ BranchInst *B_BR = cast<BranchInst>(B_SplitCondBlock->getTerminator());
+ BasicBlock *B_InactiveBranch = NULL;
+ BasicBlock *B_ActiveBranch = NULL;
+ B_ActiveBranch = B_BR->getSuccessor(1);
+ B_InactiveBranch = B_BR->getSuccessor(0);
+ B_BR->setUnconditionalDest(B_ActiveBranch);
+ removeBlocks(B_InactiveBranch, BLoop, B_ActiveBranch);
+
+ BasicBlock *A_Header = ALoop->getHeader();
+ if (A_ExitingBlock == A_Header)
+ return true;
+
+ //[*] Move exit condition into split condition block to avoid
+ // executing dead loop iteration.
+ ICmpInst *B_ExitCondition = cast<ICmpInst>(VMap[ExitCondition]);
+ Instruction *B_IndVarIncrement = cast<Instruction>(VMap[IVIncrement]);
+ ICmpInst *B_SplitCondition = cast<ICmpInst>(VMap[SplitCondition]);
+
+ moveExitCondition(A_SplitCondBlock, A_ActiveBranch, A_ExitBlock, ExitCondition,
+ cast<ICmpInst>(SplitCondition), IndVar, IVIncrement,
+ ALoop, EVOpNum);
+
+ moveExitCondition(B_SplitCondBlock, B_ActiveBranch,
+ B_ExitBlock, B_ExitCondition,
+ B_SplitCondition, B_IndVar, B_IndVarIncrement,
+ BLoop, EVOpNum);
+
+ ++NumIndexSplit;
+ return true;
+}
+
+/// cleanBlock - A block is considered clean if all non terminal instructions
+/// are either, PHINodes, IV based.
+bool LoopIndexSplit::cleanBlock(BasicBlock *BB) {
+ Instruction *Terminator = BB->getTerminator();
+ for(BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+ BI != BE; ++BI) {
+ Instruction *I = BI;
+
+ if (isa<PHINode>(I) || I == Terminator || I == ExitCondition
+ || I == SplitCondition || IVBasedValues.count(I)
+ || isa<DbgInfoIntrinsic>(I))
+ continue;
+
+ if (I->mayHaveSideEffects())
+ return false;
+
+ // I is used only inside this block then it is OK.
+ bool usedOutsideBB = false;
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI) {
+ Instruction *U = cast<Instruction>(*UI);
+ if (U->getParent() != BB)
+ usedOutsideBB = true;
+ }
+ if (!usedOutsideBB)
+ continue;
+
+ // Otherwise we have a instruction that may not allow loop spliting.
+ return false;
+ }
+ return true;
+}
+
+/// IVisLT - If Op is comparing IV based value with an loop invariant and
+/// IV based value is less than the loop invariant then return the loop
+/// invariant. Otherwise return NULL.
+Value * LoopIndexSplit::IVisLT(ICmpInst &Op) {
+ ICmpInst::Predicate P = Op.getPredicate();
+ if ((P == ICmpInst::ICMP_SLT || P == ICmpInst::ICMP_ULT)
+ && IVBasedValues.count(Op.getOperand(0))
+ && L->isLoopInvariant(Op.getOperand(1)))
+ return Op.getOperand(1);
+
+ if ((P == ICmpInst::ICMP_SGT || P == ICmpInst::ICMP_UGT)
+ && IVBasedValues.count(Op.getOperand(1))
+ && L->isLoopInvariant(Op.getOperand(0)))
+ return Op.getOperand(0);
+
+ return NULL;
+}
+
+/// IVisLE - If Op is comparing IV based value with an loop invariant and
+/// IV based value is less than or equal to the loop invariant then
+/// return the loop invariant. Otherwise return NULL.
+Value * LoopIndexSplit::IVisLE(ICmpInst &Op) {
+ ICmpInst::Predicate P = Op.getPredicate();
+ if ((P == ICmpInst::ICMP_SLE || P == ICmpInst::ICMP_ULE)
+ && IVBasedValues.count(Op.getOperand(0))
+ && L->isLoopInvariant(Op.getOperand(1)))
+ return Op.getOperand(1);
+
+ if ((P == ICmpInst::ICMP_SGE || P == ICmpInst::ICMP_UGE)
+ && IVBasedValues.count(Op.getOperand(1))
+ && L->isLoopInvariant(Op.getOperand(0)))
+ return Op.getOperand(0);
+
+ return NULL;
+}
+
+/// IVisGT - If Op is comparing IV based value with an loop invariant and
+/// IV based value is greater than the loop invariant then return the loop
+/// invariant. Otherwise return NULL.
+Value * LoopIndexSplit::IVisGT(ICmpInst &Op) {
+ ICmpInst::Predicate P = Op.getPredicate();
+ if ((P == ICmpInst::ICMP_SGT || P == ICmpInst::ICMP_UGT)
+ && IVBasedValues.count(Op.getOperand(0))
+ && L->isLoopInvariant(Op.getOperand(1)))
+ return Op.getOperand(1);
+
+ if ((P == ICmpInst::ICMP_SLT || P == ICmpInst::ICMP_ULT)
+ && IVBasedValues.count(Op.getOperand(1))
+ && L->isLoopInvariant(Op.getOperand(0)))
+ return Op.getOperand(0);
+
+ return NULL;
+}
+
+/// IVisGE - If Op is comparing IV based value with an loop invariant and
+/// IV based value is greater than or equal to the loop invariant then
+/// return the loop invariant. Otherwise return NULL.
+Value * LoopIndexSplit::IVisGE(ICmpInst &Op) {
+ ICmpInst::Predicate P = Op.getPredicate();
+ if ((P == ICmpInst::ICMP_SGE || P == ICmpInst::ICMP_UGE)
+ && IVBasedValues.count(Op.getOperand(0))
+ && L->isLoopInvariant(Op.getOperand(1)))
+ return Op.getOperand(1);
+
+ if ((P == ICmpInst::ICMP_SLE || P == ICmpInst::ICMP_ULE)
+ && IVBasedValues.count(Op.getOperand(1))
+ && L->isLoopInvariant(Op.getOperand(0)))
+ return Op.getOperand(0);
+
+ return NULL;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
new file mode 100644
index 0000000..65acc1d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -0,0 +1,423 @@
+//===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Loop Rotation Pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-rotate"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+#define MAX_HEADER_SIZE 16
+
+STATISTIC(NumRotated, "Number of loops rotated");
+namespace {
+
+ class LoopRotate : public LoopPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopRotate() : LoopPass(ID) {}
+
+ // Rotate Loop L as many times as possible. Return true if
+ // loop is rotated at least once.
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ // LCSSA form makes instruction renaming easier.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominanceFrontier>();
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved<ScalarEvolution>();
+ }
+
+ // Helper functions
+
+ /// Do actual work
+ bool rotateLoop(Loop *L, LPPassManager &LPM);
+
+ /// Initialize local data
+ void initialize();
+
+ /// After loop rotation, loop pre-header has multiple sucessors.
+ /// Insert one forwarding basic block to ensure that loop pre-header
+ /// has only one successor.
+ void preserveCanonicalLoopForm(LPPassManager &LPM);
+
+ private:
+ Loop *L;
+ BasicBlock *OrigHeader;
+ BasicBlock *OrigPreHeader;
+ BasicBlock *OrigLatch;
+ BasicBlock *NewHeader;
+ BasicBlock *Exit;
+ LPPassManager *LPM_Ptr;
+ };
+}
+
+char LoopRotate::ID = 0;
+INITIALIZE_PASS(LoopRotate, "loop-rotate", "Rotate Loops", false, false);
+
+Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
+
+/// Rotate Loop L as many times as possible. Return true if
+/// the loop is rotated at least once.
+bool LoopRotate::runOnLoop(Loop *Lp, LPPassManager &LPM) {
+
+ bool RotatedOneLoop = false;
+ initialize();
+ LPM_Ptr = &LPM;
+
+ // One loop can be rotated multiple times.
+ while (rotateLoop(Lp,LPM)) {
+ RotatedOneLoop = true;
+ initialize();
+ }
+
+ return RotatedOneLoop;
+}
+
+/// Rotate loop LP. Return true if the loop is rotated.
+bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
+ L = Lp;
+
+ OrigPreHeader = L->getLoopPreheader();
+ if (!OrigPreHeader) return false;
+
+ OrigLatch = L->getLoopLatch();
+ if (!OrigLatch) return false;
+
+ OrigHeader = L->getHeader();
+
+ // If the loop has only one block then there is not much to rotate.
+ if (L->getBlocks().size() == 1)
+ return false;
+
+ // If the loop header is not one of the loop exiting blocks then
+ // either this loop is already rotated or it is not
+ // suitable for loop rotation transformations.
+ if (!L->isLoopExiting(OrigHeader))
+ return false;
+
+ BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
+ if (!BI)
+ return false;
+ assert(BI->isConditional() && "Branch Instruction is not conditional");
+
+ // Updating PHInodes in loops with multiple exits adds complexity.
+ // Keep it simple, and restrict loop rotation to loops with one exit only.
+ // In future, lift this restriction and support for multiple exits if
+ // required.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ if (ExitBlocks.size() > 1)
+ return false;
+
+ // Check size of original header and reject
+ // loop if it is very big.
+ unsigned Size = 0;
+
+ // FIXME: Use common api to estimate size.
+ for (BasicBlock::const_iterator OI = OrigHeader->begin(),
+ OE = OrigHeader->end(); OI != OE; ++OI) {
+ if (isa<PHINode>(OI))
+ continue; // PHI nodes don't count.
+ if (isa<DbgInfoIntrinsic>(OI))
+ continue; // Debug intrinsics don't count as size.
+ ++Size;
+ }
+
+ if (Size > MAX_HEADER_SIZE)
+ return false;
+
+ // Now, this loop is suitable for rotation.
+
+ // Anything ScalarEvolution may know about this loop or the PHI nodes
+ // in its header will soon be invalidated.
+ if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
+ SE->forgetLoop(L);
+
+ // Find new Loop header. NewHeader is a Header's one and only successor
+ // that is inside loop. Header's other successor is outside the
+ // loop. Otherwise loop is not suitable for rotation.
+ Exit = BI->getSuccessor(0);
+ NewHeader = BI->getSuccessor(1);
+ if (L->contains(Exit))
+ std::swap(Exit, NewHeader);
+ assert(NewHeader && "Unable to determine new loop header");
+ assert(L->contains(NewHeader) && !L->contains(Exit) &&
+ "Unable to determine loop header and exit blocks");
+
+ // This code assumes that the new header has exactly one predecessor.
+ // Remove any single-entry PHI nodes in it.
+ assert(NewHeader->getSinglePredecessor() &&
+ "New header doesn't have one pred!");
+ FoldSingleEntryPHINodes(NewHeader);
+
+ // Begin by walking OrigHeader and populating ValueMap with an entry for
+ // each Instruction.
+ BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
+ DenseMap<const Value *, Value *> ValueMap;
+
+ // For PHI nodes, the value available in OldPreHeader is just the
+ // incoming value from OldPreHeader.
+ for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreHeader));
+
+ // For the rest of the instructions, create a clone in the OldPreHeader.
+ TerminatorInst *LoopEntryBranch = OrigPreHeader->getTerminator();
+ for (; I != E; ++I) {
+ Instruction *C = I->clone();
+ C->setName(I->getName());
+ C->insertBefore(LoopEntryBranch);
+ ValueMap[I] = C;
+ }
+
+ // Along with all the other instructions, we just cloned OrigHeader's
+ // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
+ // successors by duplicating their incoming values for OrigHeader.
+ TerminatorInst *TI = OrigHeader->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin();
+ PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+ PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreHeader);
+
+ // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
+ // OrigPreHeader's old terminator (the original branch into the loop), and
+ // remove the corresponding incoming values from the PHI nodes in OrigHeader.
+ LoopEntryBranch->eraseFromParent();
+ for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreHeader));
+
+ // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
+ // as necessary.
+ SSAUpdater SSA;
+ for (I = OrigHeader->begin(); I != E; ++I) {
+ Value *OrigHeaderVal = I;
+ Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal];
+
+ // The value now exits in two versions: the initial value in the preheader
+ // and the loop "next" value in the original header.
+ SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
+ SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
+ SSA.AddAvailableValue(OrigPreHeader, OrigPreHeaderVal);
+
+ // Visit each use of the OrigHeader instruction.
+ for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
+ UE = OrigHeaderVal->use_end(); UI != UE; ) {
+ // Grab the use before incrementing the iterator.
+ Use &U = UI.getUse();
+
+ // Increment the iterator before removing the use from the list.
+ ++UI;
+
+ // SSAUpdater can't handle a non-PHI use in the same block as an
+ // earlier def. We can easily handle those cases manually.
+ Instruction *UserInst = cast<Instruction>(U.getUser());
+ if (!isa<PHINode>(UserInst)) {
+ BasicBlock *UserBB = UserInst->getParent();
+
+ // The original users in the OrigHeader are already using the
+ // original definitions.
+ if (UserBB == OrigHeader)
+ continue;
+
+ // Users in the OrigPreHeader need to use the value to which the
+ // original definitions are mapped.
+ if (UserBB == OrigPreHeader) {
+ U = OrigPreHeaderVal;
+ continue;
+ }
+ }
+
+ // Anything else can be handled by SSAUpdater.
+ SSA.RewriteUse(U);
+ }
+ }
+
+ // NewHeader is now the header of the loop.
+ L->moveToHeader(NewHeader);
+
+ // Move the original header to the bottom of the loop, where it now more
+ // naturally belongs. This isn't necessary for correctness, and CodeGen can
+ // usually reorder blocks on its own to fix things like this up, but it's
+ // still nice to keep the IR readable.
+ //
+ // The original header should have only one predecessor at this point, since
+ // we checked that the loop had a proper preheader and unique backedge before
+ // we started.
+ assert(OrigHeader->getSinglePredecessor() &&
+ "Original loop header has too many predecessors after loop rotation!");
+ OrigHeader->moveAfter(OrigHeader->getSinglePredecessor());
+
+ // Also, since this original header only has one predecessor, zap its
+ // PHI nodes, which are now trivial.
+ FoldSingleEntryPHINodes(OrigHeader);
+
+ // TODO: We could just go ahead and merge OrigHeader into its predecessor
+ // at this point, if we don't mind updating dominator info.
+
+ // Establish a new preheader, update dominators, etc.
+ preserveCanonicalLoopForm(LPM);
+
+ ++NumRotated;
+ return true;
+}
+
+/// Initialize local data
+void LoopRotate::initialize() {
+ L = NULL;
+ OrigHeader = NULL;
+ OrigPreHeader = NULL;
+ NewHeader = NULL;
+ Exit = NULL;
+}
+
+/// After loop rotation, loop pre-header has multiple sucessors.
+/// Insert one forwarding basic block to ensure that loop pre-header
+/// has only one successor.
+void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) {
+
+ // Right now original pre-header has two successors, new header and
+ // exit block. Insert new block between original pre-header and
+ // new header such that loop's new pre-header has only one successor.
+ BasicBlock *NewPreHeader = BasicBlock::Create(OrigHeader->getContext(),
+ "bb.nph",
+ OrigHeader->getParent(),
+ NewHeader);
+ LoopInfo &LI = getAnalysis<LoopInfo>();
+ if (Loop *PL = LI.getLoopFor(OrigPreHeader))
+ PL->addBasicBlockToLoop(NewPreHeader, LI.getBase());
+ BranchInst::Create(NewHeader, NewPreHeader);
+
+ BranchInst *OrigPH_BI = cast<BranchInst>(OrigPreHeader->getTerminator());
+ if (OrigPH_BI->getSuccessor(0) == NewHeader)
+ OrigPH_BI->setSuccessor(0, NewPreHeader);
+ else {
+ assert(OrigPH_BI->getSuccessor(1) == NewHeader &&
+ "Unexpected original pre-header terminator");
+ OrigPH_BI->setSuccessor(1, NewPreHeader);
+ }
+
+ PHINode *PN;
+ for (BasicBlock::iterator I = NewHeader->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I) {
+ int index = PN->getBasicBlockIndex(OrigPreHeader);
+ assert(index != -1 && "Expected incoming value from Original PreHeader");
+ PN->setIncomingBlock(index, NewPreHeader);
+ assert(PN->getBasicBlockIndex(OrigPreHeader) == -1 &&
+ "Expected only one incoming value from Original PreHeader");
+ }
+
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
+ DT->addNewBlock(NewPreHeader, OrigPreHeader);
+ DT->changeImmediateDominator(L->getHeader(), NewPreHeader);
+ DT->changeImmediateDominator(Exit, OrigPreHeader);
+ for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
+ BI != BE; ++BI) {
+ BasicBlock *B = *BI;
+ if (L->getHeader() != B) {
+ DomTreeNode *Node = DT->getNode(B);
+ if (Node && Node->getBlock() == OrigHeader)
+ DT->changeImmediateDominator(*BI, L->getHeader());
+ }
+ }
+ DT->changeImmediateDominator(OrigHeader, OrigLatch);
+ }
+
+ if (DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>()) {
+ // New Preheader's dominance frontier is Exit block.
+ DominanceFrontier::DomSetType NewPHSet;
+ NewPHSet.insert(Exit);
+ DF->addBasicBlock(NewPreHeader, NewPHSet);
+
+ // New Header's dominance frontier now includes itself and Exit block
+ DominanceFrontier::iterator HeadI = DF->find(L->getHeader());
+ if (HeadI != DF->end()) {
+ DominanceFrontier::DomSetType & HeaderSet = HeadI->second;
+ HeaderSet.clear();
+ HeaderSet.insert(L->getHeader());
+ HeaderSet.insert(Exit);
+ } else {
+ DominanceFrontier::DomSetType HeaderSet;
+ HeaderSet.insert(L->getHeader());
+ HeaderSet.insert(Exit);
+ DF->addBasicBlock(L->getHeader(), HeaderSet);
+ }
+
+ // Original header (new Loop Latch)'s dominance frontier is Exit.
+ DominanceFrontier::iterator LatchI = DF->find(L->getLoopLatch());
+ if (LatchI != DF->end()) {
+ DominanceFrontier::DomSetType &LatchSet = LatchI->second;
+ LatchSet = LatchI->second;
+ LatchSet.clear();
+ LatchSet.insert(Exit);
+ } else {
+ DominanceFrontier::DomSetType LatchSet;
+ LatchSet.insert(Exit);
+ DF->addBasicBlock(L->getHeader(), LatchSet);
+ }
+
+ // If a loop block dominates new loop latch then add to its frontiers
+ // new header and Exit and remove new latch (which is equal to original
+ // header).
+ BasicBlock *NewLatch = L->getLoopLatch();
+
+ assert(NewLatch == OrigHeader && "NewLatch is inequal to OrigHeader");
+
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
+ for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
+ BI != BE; ++BI) {
+ BasicBlock *B = *BI;
+ if (DT->dominates(B, NewLatch)) {
+ DominanceFrontier::iterator BDFI = DF->find(B);
+ if (BDFI != DF->end()) {
+ DominanceFrontier::DomSetType &BSet = BDFI->second;
+ BSet.erase(NewLatch);
+ BSet.insert(L->getHeader());
+ BSet.insert(Exit);
+ } else {
+ DominanceFrontier::DomSetType BSet;
+ BSet.insert(L->getHeader());
+ BSet.insert(Exit);
+ DF->addBasicBlock(B, BSet);
+ }
+ }
+ }
+ }
+ }
+
+ // Preserve canonical loop form, which means Exit block should
+ // have only one predecessor.
+ SplitEdge(L->getLoopLatch(), Exit, this);
+
+ assert(NewHeader && L->getHeader() == NewHeader &&
+ "Invalid loop header after loop rotation");
+ assert(NewPreHeader && L->getLoopPreheader() == NewPreHeader &&
+ "Invalid loop preheader after loop rotation");
+ assert(L->getLoopLatch() &&
+ "Invalid loop latch after loop rotation");
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
new file mode 100644
index 0000000..e8dc5d3
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -0,0 +1,3833 @@
+//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation analyzes and transforms the induction variables (and
+// computations derived from them) into forms suitable for efficient execution
+// on the target.
+//
+// This pass performs a strength reduction on array references inside loops that
+// have as one or more of their components the loop induction variable, it
+// rewrites expressions to take advantage of scaled-index addressing modes
+// available on the target, and it performs a variety of other optimizations
+// related to loop induction variables.
+//
+// Terminology note: this code has a lot of handling for "post-increment" or
+// "post-inc" users. This is not talking about post-increment addressing modes;
+// it is instead talking about code like this:
+//
+// %i = phi [ 0, %entry ], [ %i.next, %latch ]
+// ...
+// %i.next = add %i, 1
+// %c = icmp eq %i.next, %n
+//
+// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
+// it's useful to think about these as the same register, with some uses using
+// the value of the register before the add and some using // it after. In this
+// example, the icmp is a post-increment user, since it uses %i.next, which is
+// the value of the induction variable after the increment. The other common
+// case of post-increment users is users outside the loop.
+//
+// TODO: More sophistication in the way Formulae are generated and filtered.
+//
+// TODO: Handle multiple loops at a time.
+//
+// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
+// instead of a GlobalValue?
+//
+// TODO: When truncation is free, truncate ICmp users' operands to make it a
+// smaller encoding (on x86 at least).
+//
+// TODO: When a negated register is used by an add (such as in a list of
+// multiple base registers, or as the increment expression in an addrec),
+// we may not actually need both reg and (-1 * reg) in registers; the
+// negation can be implemented by using a sub instead of an add. The
+// lack of support for taking this into consideration when making
+// register pressure decisions is partly worked around by the "Special"
+// use kind.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-reduce"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+
+/// RegSortData - This class holds data which is used to order reuse candidates.
+class RegSortData {
+public:
+ /// UsedByIndices - This represents the set of LSRUse indices which reference
+ /// a particular register.
+ SmallBitVector UsedByIndices;
+
+ RegSortData() {}
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
+
+}
+
+void RegSortData::print(raw_ostream &OS) const {
+ OS << "[NumUses=" << UsedByIndices.count() << ']';
+}
+
+void RegSortData::dump() const {
+ print(errs()); errs() << '\n';
+}
+
+namespace {
+
+/// RegUseTracker - Map register candidates to information about how they are
+/// used.
+class RegUseTracker {
+ typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
+
+ RegUsesTy RegUsesMap;
+ SmallVector<const SCEV *, 16> RegSequence;
+
+public:
+ void CountRegister(const SCEV *Reg, size_t LUIdx);
+ void DropRegister(const SCEV *Reg, size_t LUIdx);
+ void DropUse(size_t LUIdx);
+
+ bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
+
+ const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
+
+ void clear();
+
+ typedef SmallVectorImpl<const SCEV *>::iterator iterator;
+ typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator;
+ iterator begin() { return RegSequence.begin(); }
+ iterator end() { return RegSequence.end(); }
+ const_iterator begin() const { return RegSequence.begin(); }
+ const_iterator end() const { return RegSequence.end(); }
+};
+
+}
+
+void
+RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
+ std::pair<RegUsesTy::iterator, bool> Pair =
+ RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
+ RegSortData &RSD = Pair.first->second;
+ if (Pair.second)
+ RegSequence.push_back(Reg);
+ RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
+ RSD.UsedByIndices.set(LUIdx);
+}
+
+void
+RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
+ RegUsesTy::iterator It = RegUsesMap.find(Reg);
+ assert(It != RegUsesMap.end());
+ RegSortData &RSD = It->second;
+ assert(RSD.UsedByIndices.size() > LUIdx);
+ RSD.UsedByIndices.reset(LUIdx);
+}
+
+void
+RegUseTracker::DropUse(size_t LUIdx) {
+ // Remove the use index from every register's use list.
+ for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end();
+ I != E; ++I)
+ I->second.UsedByIndices.reset(LUIdx);
+}
+
+bool
+RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
+ RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
+ if (I == RegUsesMap.end())
+ return false;
+ const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
+ int i = UsedByIndices.find_first();
+ if (i == -1) return false;
+ if ((size_t)i != LUIdx) return true;
+ return UsedByIndices.find_next(i) != -1;
+}
+
+const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
+ RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
+ assert(I != RegUsesMap.end() && "Unknown register!");
+ return I->second.UsedByIndices;
+}
+
+void RegUseTracker::clear() {
+ RegUsesMap.clear();
+ RegSequence.clear();
+}
+
+namespace {
+
+/// Formula - This class holds information that describes a formula for
+/// computing satisfying a use. It may include broken-out immediates and scaled
+/// registers.
+struct Formula {
+ /// AM - This is used to represent complex addressing, as well as other kinds
+ /// of interesting uses.
+ TargetLowering::AddrMode AM;
+
+ /// BaseRegs - The list of "base" registers for this use. When this is
+ /// non-empty, AM.HasBaseReg should be set to true.
+ SmallVector<const SCEV *, 2> BaseRegs;
+
+ /// ScaledReg - The 'scaled' register for this use. This should be non-null
+ /// when AM.Scale is not zero.
+ const SCEV *ScaledReg;
+
+ Formula() : ScaledReg(0) {}
+
+ void InitialMatch(const SCEV *S, Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT);
+
+ unsigned getNumRegs() const;
+ const Type *getType() const;
+
+ void DeleteBaseReg(const SCEV *&S);
+
+ bool referencesReg(const SCEV *S) const;
+ bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
+ const RegUseTracker &RegUses) const;
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
+
+}
+
+/// DoInitialMatch - Recursion helper for InitialMatch.
+static void DoInitialMatch(const SCEV *S, Loop *L,
+ SmallVectorImpl<const SCEV *> &Good,
+ SmallVectorImpl<const SCEV *> &Bad,
+ ScalarEvolution &SE, DominatorTree &DT) {
+ // Collect expressions which properly dominate the loop header.
+ if (S->properlyDominates(L->getHeader(), &DT)) {
+ Good.push_back(S);
+ return;
+ }
+
+ // Look at add operands.
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I)
+ DoInitialMatch(*I, L, Good, Bad, SE, DT);
+ return;
+ }
+
+ // Look at addrec operands.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+ if (!AR->getStart()->isZero()) {
+ DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT);
+ DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
+ AR->getStepRecurrence(SE),
+ AR->getLoop()),
+ L, Good, Bad, SE, DT);
+ return;
+ }
+
+ // Handle a multiplication by -1 (negation) if it didn't fold.
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
+ if (Mul->getOperand(0)->isAllOnesValue()) {
+ SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
+ const SCEV *NewMul = SE.getMulExpr(Ops);
+
+ SmallVector<const SCEV *, 4> MyGood;
+ SmallVector<const SCEV *, 4> MyBad;
+ DoInitialMatch(NewMul, L, MyGood, MyBad, SE, DT);
+ const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
+ SE.getEffectiveSCEVType(NewMul->getType())));
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
+ E = MyGood.end(); I != E; ++I)
+ Good.push_back(SE.getMulExpr(NegOne, *I));
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(),
+ E = MyBad.end(); I != E; ++I)
+ Bad.push_back(SE.getMulExpr(NegOne, *I));
+ return;
+ }
+
+ // Ok, we can't do anything interesting. Just stuff the whole thing into a
+ // register and hope for the best.
+ Bad.push_back(S);
+}
+
+/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
+/// attempting to keep all loop-invariant and loop-computable values in a
+/// single base register.
+void Formula::InitialMatch(const SCEV *S, Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT) {
+ SmallVector<const SCEV *, 4> Good;
+ SmallVector<const SCEV *, 4> Bad;
+ DoInitialMatch(S, L, Good, Bad, SE, DT);
+ if (!Good.empty()) {
+ const SCEV *Sum = SE.getAddExpr(Good);
+ if (!Sum->isZero())
+ BaseRegs.push_back(Sum);
+ AM.HasBaseReg = true;
+ }
+ if (!Bad.empty()) {
+ const SCEV *Sum = SE.getAddExpr(Bad);
+ if (!Sum->isZero())
+ BaseRegs.push_back(Sum);
+ AM.HasBaseReg = true;
+ }
+}
+
+/// getNumRegs - Return the total number of register operands used by this
+/// formula. This does not include register uses implied by non-constant
+/// addrec strides.
+unsigned Formula::getNumRegs() const {
+ return !!ScaledReg + BaseRegs.size();
+}
+
+/// getType - Return the type of this formula, if it has one, or null
+/// otherwise. This type is meaningless except for the bit size.
+const Type *Formula::getType() const {
+ return !BaseRegs.empty() ? BaseRegs.front()->getType() :
+ ScaledReg ? ScaledReg->getType() :
+ AM.BaseGV ? AM.BaseGV->getType() :
+ 0;
+}
+
+/// DeleteBaseReg - Delete the given base reg from the BaseRegs list.
+void Formula::DeleteBaseReg(const SCEV *&S) {
+ if (&S != &BaseRegs.back())
+ std::swap(S, BaseRegs.back());
+ BaseRegs.pop_back();
+}
+
+/// referencesReg - Test if this formula references the given register.
+bool Formula::referencesReg(const SCEV *S) const {
+ return S == ScaledReg ||
+ std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
+}
+
+/// hasRegsUsedByUsesOtherThan - Test whether this formula uses registers
+/// which are used by uses other than the use with the given index.
+bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
+ const RegUseTracker &RegUses) const {
+ if (ScaledReg)
+ if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
+ return true;
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
+ E = BaseRegs.end(); I != E; ++I)
+ if (RegUses.isRegUsedByUsesOtherThan(*I, LUIdx))
+ return true;
+ return false;
+}
+
+void Formula::print(raw_ostream &OS) const {
+ bool First = true;
+ if (AM.BaseGV) {
+ if (!First) OS << " + "; else First = false;
+ WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false);
+ }
+ if (AM.BaseOffs != 0) {
+ if (!First) OS << " + "; else First = false;
+ OS << AM.BaseOffs;
+ }
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
+ E = BaseRegs.end(); I != E; ++I) {
+ if (!First) OS << " + "; else First = false;
+ OS << "reg(" << **I << ')';
+ }
+ if (AM.HasBaseReg && BaseRegs.empty()) {
+ if (!First) OS << " + "; else First = false;
+ OS << "**error: HasBaseReg**";
+ } else if (!AM.HasBaseReg && !BaseRegs.empty()) {
+ if (!First) OS << " + "; else First = false;
+ OS << "**error: !HasBaseReg**";
+ }
+ if (AM.Scale != 0) {
+ if (!First) OS << " + "; else First = false;
+ OS << AM.Scale << "*reg(";
+ if (ScaledReg)
+ OS << *ScaledReg;
+ else
+ OS << "<unknown>";
+ OS << ')';
+ }
+}
+
+void Formula::dump() const {
+ print(errs()); errs() << '\n';
+}
+
+/// isAddRecSExtable - Return true if the given addrec can be sign-extended
+/// without changing its value.
+static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
+ const Type *WideTy =
+ IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
+ return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
+}
+
+/// isAddSExtable - Return true if the given add can be sign-extended
+/// without changing its value.
+static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
+ const Type *WideTy =
+ IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
+ return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
+}
+
+/// isMulSExtable - Return true if the given mul can be sign-extended
+/// without changing its value.
+static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
+ const Type *WideTy =
+ IntegerType::get(SE.getContext(),
+ SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
+ return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
+}
+
+/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined
+/// and if the remainder is known to be zero, or null otherwise. If
+/// IgnoreSignificantBits is true, expressions like (X * Y) /s Y are simplified
+/// to Y, ignoring that the multiplication may overflow, which is useful when
+/// the result will be used in a context where the most significant bits are
+/// ignored.
+static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
+ ScalarEvolution &SE,
+ bool IgnoreSignificantBits = false) {
+ // Handle the trivial case, which works for any SCEV type.
+ if (LHS == RHS)
+ return SE.getConstant(LHS->getType(), 1);
+
+ // Handle a few RHS special cases.
+ const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
+ if (RC) {
+ const APInt &RA = RC->getValue()->getValue();
+ // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
+ // some folding.
+ if (RA.isAllOnesValue())
+ return SE.getMulExpr(LHS, RC);
+ // Handle x /s 1 as x.
+ if (RA == 1)
+ return LHS;
+ }
+
+ // Check for a division of a constant by a constant.
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
+ if (!RC)
+ return 0;
+ const APInt &LA = C->getValue()->getValue();
+ const APInt &RA = RC->getValue()->getValue();
+ if (LA.srem(RA) != 0)
+ return 0;
+ return SE.getConstant(LA.sdiv(RA));
+ }
+
+ // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
+ if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
+ const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
+ IgnoreSignificantBits);
+ if (!Step) return 0;
+ const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
+ IgnoreSignificantBits);
+ if (!Start) return 0;
+ return SE.getAddRecExpr(Start, Step, AR->getLoop());
+ }
+ return 0;
+ }
+
+ // Distribute the sdiv over add operands, if the add doesn't overflow.
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
+ if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
+ SmallVector<const SCEV *, 8> Ops;
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I) {
+ const SCEV *Op = getExactSDiv(*I, RHS, SE,
+ IgnoreSignificantBits);
+ if (!Op) return 0;
+ Ops.push_back(Op);
+ }
+ return SE.getAddExpr(Ops);
+ }
+ return 0;
+ }
+
+ // Check for a multiply operand that we can pull RHS out of.
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
+ if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
+ SmallVector<const SCEV *, 4> Ops;
+ bool Found = false;
+ for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
+ I != E; ++I) {
+ const SCEV *S = *I;
+ if (!Found)
+ if (const SCEV *Q = getExactSDiv(S, RHS, SE,
+ IgnoreSignificantBits)) {
+ S = Q;
+ Found = true;
+ }
+ Ops.push_back(S);
+ }
+ return Found ? SE.getMulExpr(Ops) : 0;
+ }
+ return 0;
+ }
+
+ // Otherwise we don't know.
+ return 0;
+}
+
+/// ExtractImmediate - If S involves the addition of a constant integer value,
+/// return that integer value, and mutate S to point to a new SCEV with that
+/// value excluded.
+static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+ if (C->getValue()->getValue().getMinSignedBits() <= 64) {
+ S = SE.getConstant(C->getType(), 0);
+ return C->getValue()->getSExtValue();
+ }
+ } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
+ int64_t Result = ExtractImmediate(NewOps.front(), SE);
+ if (Result != 0)
+ S = SE.getAddExpr(NewOps);
+ return Result;
+ } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
+ int64_t Result = ExtractImmediate(NewOps.front(), SE);
+ if (Result != 0)
+ S = SE.getAddRecExpr(NewOps, AR->getLoop());
+ return Result;
+ }
+ return 0;
+}
+
+/// ExtractSymbol - If S involves the addition of a GlobalValue address,
+/// return that symbol, and mutate S to point to a new SCEV with that
+/// value excluded.
+static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
+ S = SE.getConstant(GV->getType(), 0);
+ return GV;
+ }
+ } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
+ GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
+ if (Result)
+ S = SE.getAddExpr(NewOps);
+ return Result;
+ } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
+ GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
+ if (Result)
+ S = SE.getAddRecExpr(NewOps, AR->getLoop());
+ return Result;
+ }
+ return 0;
+}
+
+/// isAddressUse - Returns true if the specified instruction is using the
+/// specified value as an address.
+static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
+ bool isAddress = isa<LoadInst>(Inst);
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (SI->getOperand(1) == OperandVal)
+ isAddress = true;
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ // Addressing modes can also be folded into prefetches and a variety
+ // of intrinsics.
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::prefetch:
+ case Intrinsic::x86_sse2_loadu_dq:
+ case Intrinsic::x86_sse2_loadu_pd:
+ case Intrinsic::x86_sse_loadu_ps:
+ case Intrinsic::x86_sse_storeu_ps:
+ case Intrinsic::x86_sse2_storeu_pd:
+ case Intrinsic::x86_sse2_storeu_dq:
+ case Intrinsic::x86_sse2_storel_dq:
+ if (II->getArgOperand(0) == OperandVal)
+ isAddress = true;
+ break;
+ }
+ }
+ return isAddress;
+}
+
+/// getAccessType - Return the type of the memory being accessed.
+static const Type *getAccessType(const Instruction *Inst) {
+ const Type *AccessTy = Inst->getType();
+ if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ AccessTy = SI->getOperand(0)->getType();
+ else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ // Addressing modes can also be folded into prefetches and a variety
+ // of intrinsics.
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::x86_sse_storeu_ps:
+ case Intrinsic::x86_sse2_storeu_pd:
+ case Intrinsic::x86_sse2_storeu_dq:
+ case Intrinsic::x86_sse2_storel_dq:
+ AccessTy = II->getArgOperand(0)->getType();
+ break;
+ }
+ }
+
+ // All pointers have the same requirements, so canonicalize them to an
+ // arbitrary pointer type to minimize variation.
+ if (const PointerType *PTy = dyn_cast<PointerType>(AccessTy))
+ AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
+ PTy->getAddressSpace());
+
+ return AccessTy;
+}
+
+/// DeleteTriviallyDeadInstructions - If any of the instructions is the
+/// specified set are trivially dead, delete them and see if this makes any of
+/// their operands subsequently dead.
+static bool
+DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
+ bool Changed = false;
+
+ while (!DeadInsts.empty()) {
+ Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
+
+ if (I == 0 || !isInstructionTriviallyDead(I))
+ continue;
+
+ for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+ if (Instruction *U = dyn_cast<Instruction>(*OI)) {
+ *OI = 0;
+ if (U->use_empty())
+ DeadInsts.push_back(U);
+ }
+
+ I->eraseFromParent();
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+namespace {
+
+/// Cost - This class is used to measure and compare candidate formulae.
+class Cost {
+ /// TODO: Some of these could be merged. Also, a lexical ordering
+ /// isn't always optimal.
+ unsigned NumRegs;
+ unsigned AddRecCost;
+ unsigned NumIVMuls;
+ unsigned NumBaseAdds;
+ unsigned ImmCost;
+ unsigned SetupCost;
+
+public:
+ Cost()
+ : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
+ SetupCost(0) {}
+
+ unsigned getNumRegs() const { return NumRegs; }
+
+ bool operator<(const Cost &Other) const;
+
+ void Loose();
+
+ void RateFormula(const Formula &F,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const DenseSet<const SCEV *> &VisitedRegs,
+ const Loop *L,
+ const SmallVectorImpl<int64_t> &Offsets,
+ ScalarEvolution &SE, DominatorTree &DT);
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+
+private:
+ void RateRegister(const SCEV *Reg,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT);
+ void RatePrimaryRegister(const SCEV *Reg,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT);
+};
+
+}
+
+/// RateRegister - Tally up interesting quantities from the given register.
+void Cost::RateRegister(const SCEV *Reg,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT) {
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
+ if (AR->getLoop() == L)
+ AddRecCost += 1; /// TODO: This should be a function of the stride.
+
+ // If this is an addrec for a loop that's already been visited by LSR,
+ // don't second-guess its addrec phi nodes. LSR isn't currently smart
+ // enough to reason about more than one loop at a time. Consider these
+ // registers free and leave them alone.
+ else if (L->contains(AR->getLoop()) ||
+ (!AR->getLoop()->contains(L) &&
+ DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
+ for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ if (SE.isSCEVable(PN->getType()) &&
+ (SE.getEffectiveSCEVType(PN->getType()) ==
+ SE.getEffectiveSCEVType(AR->getType())) &&
+ SE.getSCEV(PN) == AR)
+ return;
+
+ // If this isn't one of the addrecs that the loop already has, it
+ // would require a costly new phi and add. TODO: This isn't
+ // precisely modeled right now.
+ ++NumBaseAdds;
+ if (!Regs.count(AR->getStart()))
+ RateRegister(AR->getStart(), Regs, L, SE, DT);
+ }
+
+ // Add the step value register, if it needs one.
+ // TODO: The non-affine case isn't precisely modeled here.
+ if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1)))
+ if (!Regs.count(AR->getStart()))
+ RateRegister(AR->getOperand(1), Regs, L, SE, DT);
+ }
+ ++NumRegs;
+
+ // Rough heuristic; favor registers which don't require extra setup
+ // instructions in the preheader.
+ if (!isa<SCEVUnknown>(Reg) &&
+ !isa<SCEVConstant>(Reg) &&
+ !(isa<SCEVAddRecExpr>(Reg) &&
+ (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
+ isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
+ ++SetupCost;
+}
+
+/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
+/// before, rate it.
+void Cost::RatePrimaryRegister(const SCEV *Reg,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT) {
+ if (Regs.insert(Reg))
+ RateRegister(Reg, Regs, L, SE, DT);
+}
+
+void Cost::RateFormula(const Formula &F,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const DenseSet<const SCEV *> &VisitedRegs,
+ const Loop *L,
+ const SmallVectorImpl<int64_t> &Offsets,
+ ScalarEvolution &SE, DominatorTree &DT) {
+ // Tally up the registers.
+ if (const SCEV *ScaledReg = F.ScaledReg) {
+ if (VisitedRegs.count(ScaledReg)) {
+ Loose();
+ return;
+ }
+ RatePrimaryRegister(ScaledReg, Regs, L, SE, DT);
+ }
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+ E = F.BaseRegs.end(); I != E; ++I) {
+ const SCEV *BaseReg = *I;
+ if (VisitedRegs.count(BaseReg)) {
+ Loose();
+ return;
+ }
+ RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
+
+ NumIVMuls += isa<SCEVMulExpr>(BaseReg) &&
+ BaseReg->hasComputableLoopEvolution(L);
+ }
+
+ if (F.BaseRegs.size() > 1)
+ NumBaseAdds += F.BaseRegs.size() - 1;
+
+ // Tally up the non-zero immediates.
+ for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
+ E = Offsets.end(); I != E; ++I) {
+ int64_t Offset = (uint64_t)*I + F.AM.BaseOffs;
+ if (F.AM.BaseGV)
+ ImmCost += 64; // Handle symbolic values conservatively.
+ // TODO: This should probably be the pointer size.
+ else if (Offset != 0)
+ ImmCost += APInt(64, Offset, true).getMinSignedBits();
+ }
+}
+
+/// Loose - Set this cost to a loosing value.
+void Cost::Loose() {
+ NumRegs = ~0u;
+ AddRecCost = ~0u;
+ NumIVMuls = ~0u;
+ NumBaseAdds = ~0u;
+ ImmCost = ~0u;
+ SetupCost = ~0u;
+}
+
+/// operator< - Choose the lower cost.
+bool Cost::operator<(const Cost &Other) const {
+ if (NumRegs != Other.NumRegs)
+ return NumRegs < Other.NumRegs;
+ if (AddRecCost != Other.AddRecCost)
+ return AddRecCost < Other.AddRecCost;
+ if (NumIVMuls != Other.NumIVMuls)
+ return NumIVMuls < Other.NumIVMuls;
+ if (NumBaseAdds != Other.NumBaseAdds)
+ return NumBaseAdds < Other.NumBaseAdds;
+ if (ImmCost != Other.ImmCost)
+ return ImmCost < Other.ImmCost;
+ if (SetupCost != Other.SetupCost)
+ return SetupCost < Other.SetupCost;
+ return false;
+}
+
+void Cost::print(raw_ostream &OS) const {
+ OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
+ if (AddRecCost != 0)
+ OS << ", with addrec cost " << AddRecCost;
+ if (NumIVMuls != 0)
+ OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
+ if (NumBaseAdds != 0)
+ OS << ", plus " << NumBaseAdds << " base add"
+ << (NumBaseAdds == 1 ? "" : "s");
+ if (ImmCost != 0)
+ OS << ", plus " << ImmCost << " imm cost";
+ if (SetupCost != 0)
+ OS << ", plus " << SetupCost << " setup cost";
+}
+
+void Cost::dump() const {
+ print(errs()); errs() << '\n';
+}
+
+namespace {
+
+/// LSRFixup - An operand value in an instruction which is to be replaced
+/// with some equivalent, possibly strength-reduced, replacement.
+struct LSRFixup {
+ /// UserInst - The instruction which will be updated.
+ Instruction *UserInst;
+
+ /// OperandValToReplace - The operand of the instruction which will
+ /// be replaced. The operand may be used more than once; every instance
+ /// will be replaced.
+ Value *OperandValToReplace;
+
+ /// PostIncLoops - If this user is to use the post-incremented value of an
+ /// induction variable, this variable is non-null and holds the loop
+ /// associated with the induction variable.
+ PostIncLoopSet PostIncLoops;
+
+ /// LUIdx - The index of the LSRUse describing the expression which
+ /// this fixup needs, minus an offset (below).
+ size_t LUIdx;
+
+ /// Offset - A constant offset to be added to the LSRUse expression.
+ /// This allows multiple fixups to share the same LSRUse with different
+ /// offsets, for example in an unrolled loop.
+ int64_t Offset;
+
+ bool isUseFullyOutsideLoop(const Loop *L) const;
+
+ LSRFixup();
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
+
+}
+
+LSRFixup::LSRFixup()
+ : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {}
+
+/// isUseFullyOutsideLoop - Test whether this fixup always uses its
+/// value outside of the given loop.
+bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
+ // PHI nodes use their value in their incoming blocks.
+ if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == OperandValToReplace &&
+ L->contains(PN->getIncomingBlock(i)))
+ return false;
+ return true;
+ }
+
+ return !L->contains(UserInst);
+}
+
+void LSRFixup::print(raw_ostream &OS) const {
+ OS << "UserInst=";
+ // Store is common and interesting enough to be worth special-casing.
+ if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
+ OS << "store ";
+ WriteAsOperand(OS, Store->getOperand(0), /*PrintType=*/false);
+ } else if (UserInst->getType()->isVoidTy())
+ OS << UserInst->getOpcodeName();
+ else
+ WriteAsOperand(OS, UserInst, /*PrintType=*/false);
+
+ OS << ", OperandValToReplace=";
+ WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
+
+ for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
+ E = PostIncLoops.end(); I != E; ++I) {
+ OS << ", PostIncLoop=";
+ WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false);
+ }
+
+ if (LUIdx != ~size_t(0))
+ OS << ", LUIdx=" << LUIdx;
+
+ if (Offset != 0)
+ OS << ", Offset=" << Offset;
+}
+
+void LSRFixup::dump() const {
+ print(errs()); errs() << '\n';
+}
+
+namespace {
+
+/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
+/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
+struct UniquifierDenseMapInfo {
+ static SmallVector<const SCEV *, 2> getEmptyKey() {
+ SmallVector<const SCEV *, 2> V;
+ V.push_back(reinterpret_cast<const SCEV *>(-1));
+ return V;
+ }
+
+ static SmallVector<const SCEV *, 2> getTombstoneKey() {
+ SmallVector<const SCEV *, 2> V;
+ V.push_back(reinterpret_cast<const SCEV *>(-2));
+ return V;
+ }
+
+ static unsigned getHashValue(const SmallVector<const SCEV *, 2> &V) {
+ unsigned Result = 0;
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(),
+ E = V.end(); I != E; ++I)
+ Result ^= DenseMapInfo<const SCEV *>::getHashValue(*I);
+ return Result;
+ }
+
+ static bool isEqual(const SmallVector<const SCEV *, 2> &LHS,
+ const SmallVector<const SCEV *, 2> &RHS) {
+ return LHS == RHS;
+ }
+};
+
+/// LSRUse - This class holds the state that LSR keeps for each use in
+/// IVUsers, as well as uses invented by LSR itself. It includes information
+/// about what kinds of things can be folded into the user, information about
+/// the user itself, and information about how the use may be satisfied.
+/// TODO: Represent multiple users of the same expression in common?
+class LSRUse {
+ DenseSet<SmallVector<const SCEV *, 2>, UniquifierDenseMapInfo> Uniquifier;
+
+public:
+ /// KindType - An enum for a kind of use, indicating what types of
+ /// scaled and immediate operands it might support.
+ enum KindType {
+ Basic, ///< A normal use, with no folding.
+ Special, ///< A special case of basic, allowing -1 scales.
+ Address, ///< An address use; folding according to TargetLowering
+ ICmpZero ///< An equality icmp with both operands folded into one.
+ // TODO: Add a generic icmp too?
+ };
+
+ KindType Kind;
+ const Type *AccessTy;
+
+ SmallVector<int64_t, 8> Offsets;
+ int64_t MinOffset;
+ int64_t MaxOffset;
+
+ /// AllFixupsOutsideLoop - This records whether all of the fixups using this
+ /// LSRUse are outside of the loop, in which case some special-case heuristics
+ /// may be used.
+ bool AllFixupsOutsideLoop;
+
+ /// WidestFixupType - This records the widest use type for any fixup using
+ /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
+ /// max fixup widths to be equivalent, because the narrower one may be relying
+ /// on the implicit truncation to truncate away bogus bits.
+ const Type *WidestFixupType;
+
+ /// Formulae - A list of ways to build a value that can satisfy this user.
+ /// After the list is populated, one of these is selected heuristically and
+ /// used to formulate a replacement for OperandValToReplace in UserInst.
+ SmallVector<Formula, 12> Formulae;
+
+ /// Regs - The set of register candidates used by all formulae in this LSRUse.
+ SmallPtrSet<const SCEV *, 4> Regs;
+
+ LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T),
+ MinOffset(INT64_MAX),
+ MaxOffset(INT64_MIN),
+ AllFixupsOutsideLoop(true),
+ WidestFixupType(0) {}
+
+ bool HasFormulaWithSameRegs(const Formula &F) const;
+ bool InsertFormula(const Formula &F);
+ void DeleteFormula(Formula &F);
+ void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
+
+}
+
+/// HasFormula - Test whether this use as a formula which has the same
+/// registers as the given formula.
+bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
+ SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+ if (F.ScaledReg) Key.push_back(F.ScaledReg);
+ // Unstable sort by host order ok, because this is only used for uniquifying.
+ std::sort(Key.begin(), Key.end());
+ return Uniquifier.count(Key);
+}
+
+/// InsertFormula - If the given formula has not yet been inserted, add it to
+/// the list, and return true. Return false otherwise.
+bool LSRUse::InsertFormula(const Formula &F) {
+ SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+ if (F.ScaledReg) Key.push_back(F.ScaledReg);
+ // Unstable sort by host order ok, because this is only used for uniquifying.
+ std::sort(Key.begin(), Key.end());
+
+ if (!Uniquifier.insert(Key).second)
+ return false;
+
+ // Using a register to hold the value of 0 is not profitable.
+ assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
+ "Zero allocated in a scaled register!");
+#ifndef NDEBUG
+ for (SmallVectorImpl<const SCEV *>::const_iterator I =
+ F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I)
+ assert(!(*I)->isZero() && "Zero allocated in a base register!");
+#endif
+
+ // Add the formula to the list.
+ Formulae.push_back(F);
+
+ // Record registers now being used by this use.
+ if (F.ScaledReg) Regs.insert(F.ScaledReg);
+ Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+
+ return true;
+}
+
+/// DeleteFormula - Remove the given formula from this use's list.
+void LSRUse::DeleteFormula(Formula &F) {
+ if (&F != &Formulae.back())
+ std::swap(F, Formulae.back());
+ Formulae.pop_back();
+ assert(!Formulae.empty() && "LSRUse has no formulae left!");
+}
+
+/// RecomputeRegs - Recompute the Regs field, and update RegUses.
+void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
+ // Now that we've filtered out some formulae, recompute the Regs set.
+ SmallPtrSet<const SCEV *, 4> OldRegs = Regs;
+ Regs.clear();
+ for (SmallVectorImpl<Formula>::const_iterator I = Formulae.begin(),
+ E = Formulae.end(); I != E; ++I) {
+ const Formula &F = *I;
+ if (F.ScaledReg) Regs.insert(F.ScaledReg);
+ Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+ }
+
+ // Update the RegTracker.
+ for (SmallPtrSet<const SCEV *, 4>::iterator I = OldRegs.begin(),
+ E = OldRegs.end(); I != E; ++I)
+ if (!Regs.count(*I))
+ RegUses.DropRegister(*I, LUIdx);
+}
+
+void LSRUse::print(raw_ostream &OS) const {
+ OS << "LSR Use: Kind=";
+ switch (Kind) {
+ case Basic: OS << "Basic"; break;
+ case Special: OS << "Special"; break;
+ case ICmpZero: OS << "ICmpZero"; break;
+ case Address:
+ OS << "Address of ";
+ if (AccessTy->isPointerTy())
+ OS << "pointer"; // the full pointer type could be really verbose
+ else
+ OS << *AccessTy;
+ }
+
+ OS << ", Offsets={";
+ for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
+ E = Offsets.end(); I != E; ++I) {
+ OS << *I;
+ if (llvm::next(I) != E)
+ OS << ',';
+ }
+ OS << '}';
+
+ if (AllFixupsOutsideLoop)
+ OS << ", all-fixups-outside-loop";
+
+ if (WidestFixupType)
+ OS << ", widest fixup type: " << *WidestFixupType;
+}
+
+void LSRUse::dump() const {
+ print(errs()); errs() << '\n';
+}
+
+/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
+/// be completely folded into the user instruction at isel time. This includes
+/// address-mode folding and special icmp tricks.
+static bool isLegalUse(const TargetLowering::AddrMode &AM,
+ LSRUse::KindType Kind, const Type *AccessTy,
+ const TargetLowering *TLI) {
+ switch (Kind) {
+ case LSRUse::Address:
+ // If we have low-level target information, ask the target if it can
+ // completely fold this address.
+ if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
+
+ // Otherwise, just guess that reg+reg addressing is legal.
+ return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
+
+ case LSRUse::ICmpZero:
+ // There's not even a target hook for querying whether it would be legal to
+ // fold a GV into an ICmp.
+ if (AM.BaseGV)
+ return false;
+
+ // ICmp only has two operands; don't allow more than two non-trivial parts.
+ if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0)
+ return false;
+
+ // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
+ // putting the scaled register in the other operand of the icmp.
+ if (AM.Scale != 0 && AM.Scale != -1)
+ return false;
+
+ // If we have low-level target information, ask the target if it can fold an
+ // integer immediate on an icmp.
+ if (AM.BaseOffs != 0) {
+ if (TLI) return TLI->isLegalICmpImmediate(-AM.BaseOffs);
+ return false;
+ }
+
+ return true;
+
+ case LSRUse::Basic:
+ // Only handle single-register values.
+ return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
+
+ case LSRUse::Special:
+ // Only handle -1 scales, or no scale.
+ return AM.Scale == 0 || AM.Scale == -1;
+ }
+
+ return false;
+}
+
+static bool isLegalUse(TargetLowering::AddrMode AM,
+ int64_t MinOffset, int64_t MaxOffset,
+ LSRUse::KindType Kind, const Type *AccessTy,
+ const TargetLowering *TLI) {
+ // Check for overflow.
+ if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
+ (MinOffset > 0))
+ return false;
+ AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset;
+ if (isLegalUse(AM, Kind, AccessTy, TLI)) {
+ AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset;
+ // Check for overflow.
+ if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) !=
+ (MaxOffset > 0))
+ return false;
+ AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset;
+ return isLegalUse(AM, Kind, AccessTy, TLI);
+ }
+ return false;
+}
+
+static bool isAlwaysFoldable(int64_t BaseOffs,
+ GlobalValue *BaseGV,
+ bool HasBaseReg,
+ LSRUse::KindType Kind, const Type *AccessTy,
+ const TargetLowering *TLI) {
+ // Fast-path: zero is always foldable.
+ if (BaseOffs == 0 && !BaseGV) return true;
+
+ // Conservatively, create an address with an immediate and a
+ // base and a scale.
+ TargetLowering::AddrMode AM;
+ AM.BaseOffs = BaseOffs;
+ AM.BaseGV = BaseGV;
+ AM.HasBaseReg = HasBaseReg;
+ AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+
+ // Canonicalize a scale of 1 to a base register if the formula doesn't
+ // already have a base register.
+ if (!AM.HasBaseReg && AM.Scale == 1) {
+ AM.Scale = 0;
+ AM.HasBaseReg = true;
+ }
+
+ return isLegalUse(AM, Kind, AccessTy, TLI);
+}
+
+static bool isAlwaysFoldable(const SCEV *S,
+ int64_t MinOffset, int64_t MaxOffset,
+ bool HasBaseReg,
+ LSRUse::KindType Kind, const Type *AccessTy,
+ const TargetLowering *TLI,
+ ScalarEvolution &SE) {
+ // Fast-path: zero is always foldable.
+ if (S->isZero()) return true;
+
+ // Conservatively, create an address with an immediate and a
+ // base and a scale.
+ int64_t BaseOffs = ExtractImmediate(S, SE);
+ GlobalValue *BaseGV = ExtractSymbol(S, SE);
+
+ // If there's anything else involved, it's not foldable.
+ if (!S->isZero()) return false;
+
+ // Fast-path: zero is always foldable.
+ if (BaseOffs == 0 && !BaseGV) return true;
+
+ // Conservatively, create an address with an immediate and a
+ // base and a scale.
+ TargetLowering::AddrMode AM;
+ AM.BaseOffs = BaseOffs;
+ AM.BaseGV = BaseGV;
+ AM.HasBaseReg = HasBaseReg;
+ AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+
+ return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
+}
+
+namespace {
+
+/// UseMapDenseMapInfo - A DenseMapInfo implementation for holding
+/// DenseMaps and DenseSets of pairs of const SCEV* and LSRUse::Kind.
+struct UseMapDenseMapInfo {
+ static std::pair<const SCEV *, LSRUse::KindType> getEmptyKey() {
+ return std::make_pair(reinterpret_cast<const SCEV *>(-1), LSRUse::Basic);
+ }
+
+ static std::pair<const SCEV *, LSRUse::KindType> getTombstoneKey() {
+ return std::make_pair(reinterpret_cast<const SCEV *>(-2), LSRUse::Basic);
+ }
+
+ static unsigned
+ getHashValue(const std::pair<const SCEV *, LSRUse::KindType> &V) {
+ unsigned Result = DenseMapInfo<const SCEV *>::getHashValue(V.first);
+ Result ^= DenseMapInfo<unsigned>::getHashValue(unsigned(V.second));
+ return Result;
+ }
+
+ static bool isEqual(const std::pair<const SCEV *, LSRUse::KindType> &LHS,
+ const std::pair<const SCEV *, LSRUse::KindType> &RHS) {
+ return LHS == RHS;
+ }
+};
+
+/// FormulaSorter - This class implements an ordering for formulae which sorts
+/// the by their standalone cost.
+class FormulaSorter {
+ /// These two sets are kept empty, so that we compute standalone costs.
+ DenseSet<const SCEV *> VisitedRegs;
+ SmallPtrSet<const SCEV *, 16> Regs;
+ Loop *L;
+ LSRUse *LU;
+ ScalarEvolution &SE;
+ DominatorTree &DT;
+
+public:
+ FormulaSorter(Loop *l, LSRUse &lu, ScalarEvolution &se, DominatorTree &dt)
+ : L(l), LU(&lu), SE(se), DT(dt) {}
+
+ bool operator()(const Formula &A, const Formula &B) {
+ Cost CostA;
+ CostA.RateFormula(A, Regs, VisitedRegs, L, LU->Offsets, SE, DT);
+ Regs.clear();
+ Cost CostB;
+ CostB.RateFormula(B, Regs, VisitedRegs, L, LU->Offsets, SE, DT);
+ Regs.clear();
+ return CostA < CostB;
+ }
+};
+
+/// LSRInstance - This class holds state for the main loop strength reduction
+/// logic.
+class LSRInstance {
+ IVUsers &IU;
+ ScalarEvolution &SE;
+ DominatorTree &DT;
+ LoopInfo &LI;
+ const TargetLowering *const TLI;
+ Loop *const L;
+ bool Changed;
+
+ /// IVIncInsertPos - This is the insert position that the current loop's
+ /// induction variable increment should be placed. In simple loops, this is
+ /// the latch block's terminator. But in more complicated cases, this is a
+ /// position which will dominate all the in-loop post-increment users.
+ Instruction *IVIncInsertPos;
+
+ /// Factors - Interesting factors between use strides.
+ SmallSetVector<int64_t, 8> Factors;
+
+ /// Types - Interesting use types, to facilitate truncation reuse.
+ SmallSetVector<const Type *, 4> Types;
+
+ /// Fixups - The list of operands which are to be replaced.
+ SmallVector<LSRFixup, 16> Fixups;
+
+ /// Uses - The list of interesting uses.
+ SmallVector<LSRUse, 16> Uses;
+
+ /// RegUses - Track which uses use which register candidates.
+ RegUseTracker RegUses;
+
+ void OptimizeShadowIV();
+ bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
+ ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
+ void OptimizeLoopTermCond();
+
+ void CollectInterestingTypesAndFactors();
+ void CollectFixupsAndInitialFormulae();
+
+ LSRFixup &getNewFixup() {
+ Fixups.push_back(LSRFixup());
+ return Fixups.back();
+ }
+
+ // Support for sharing of LSRUses between LSRFixups.
+ typedef DenseMap<std::pair<const SCEV *, LSRUse::KindType>,
+ size_t,
+ UseMapDenseMapInfo> UseMapTy;
+ UseMapTy UseMap;
+
+ bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
+ LSRUse::KindType Kind, const Type *AccessTy);
+
+ std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
+ LSRUse::KindType Kind,
+ const Type *AccessTy);
+
+ void DeleteUse(LSRUse &LU);
+
+ LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
+
+public:
+ void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
+ void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
+ void CountRegisters(const Formula &F, size_t LUIdx);
+ bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
+
+ void CollectLoopInvariantFixupsAndFormulae();
+
+ void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
+ unsigned Depth = 0);
+ void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateCrossUseConstantOffsets();
+ void GenerateAllReuseFormulae();
+
+ void FilterOutUndesirableDedicatedRegisters();
+
+ size_t EstimateSearchSpaceComplexity() const;
+ void NarrowSearchSpaceByDetectingSupersets();
+ void NarrowSearchSpaceByCollapsingUnrolledCode();
+ void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+ void NarrowSearchSpaceByPickingWinnerRegs();
+ void NarrowSearchSpaceUsingHeuristics();
+
+ void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
+ Cost &SolutionCost,
+ SmallVectorImpl<const Formula *> &Workspace,
+ const Cost &CurCost,
+ const SmallPtrSet<const SCEV *, 16> &CurRegs,
+ DenseSet<const SCEV *> &VisitedRegs) const;
+ void Solve(SmallVectorImpl<const Formula *> &Solution) const;
+
+ BasicBlock::iterator
+ HoistInsertPosition(BasicBlock::iterator IP,
+ const SmallVectorImpl<Instruction *> &Inputs) const;
+ BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,
+ const LSRFixup &LF,
+ const LSRUse &LU) const;
+
+ Value *Expand(const LSRFixup &LF,
+ const Formula &F,
+ BasicBlock::iterator IP,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts) const;
+ void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
+ const Formula &F,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ Pass *P) const;
+ void Rewrite(const LSRFixup &LF,
+ const Formula &F,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ Pass *P) const;
+ void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
+ Pass *P);
+
+ LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
+
+ bool getChanged() const { return Changed; }
+
+ void print_factors_and_types(raw_ostream &OS) const;
+ void print_fixups(raw_ostream &OS) const;
+ void print_uses(raw_ostream &OS) const;
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
+
+}
+
+/// OptimizeShadowIV - If IV is used in a int-to-float cast
+/// inside the loop then try to eliminate the cast operation.
+void LSRInstance::OptimizeShadowIV() {
+ const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+ return;
+
+ for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
+ UI != E; /* empty */) {
+ IVUsers::const_iterator CandidateUI = UI;
+ ++UI;
+ Instruction *ShadowUse = CandidateUI->getUser();
+ const Type *DestTy = NULL;
+
+ /* If shadow use is a int->float cast then insert a second IV
+ to eliminate this cast.
+
+ for (unsigned i = 0; i < n; ++i)
+ foo((double)i);
+
+ is transformed into
+
+ double d = 0.0;
+ for (unsigned i = 0; i < n; ++i, ++d)
+ foo(d);
+ */
+ if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
+ DestTy = UCast->getDestTy();
+ else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
+ DestTy = SCast->getDestTy();
+ if (!DestTy) continue;
+
+ if (TLI) {
+ // If target does not support DestTy natively then do not apply
+ // this transformation.
+ EVT DVT = TLI->getValueType(DestTy);
+ if (!TLI->isTypeLegal(DVT)) continue;
+ }
+
+ PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
+ if (!PH) continue;
+ if (PH->getNumIncomingValues() != 2) continue;
+
+ const Type *SrcTy = PH->getType();
+ int Mantissa = DestTy->getFPMantissaWidth();
+ if (Mantissa == -1) continue;
+ if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
+ continue;
+
+ unsigned Entry, Latch;
+ if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
+ Entry = 0;
+ Latch = 1;
+ } else {
+ Entry = 1;
+ Latch = 0;
+ }
+
+ ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
+ if (!Init) continue;
+ Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
+
+ BinaryOperator *Incr =
+ dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
+ if (!Incr) continue;
+ if (Incr->getOpcode() != Instruction::Add
+ && Incr->getOpcode() != Instruction::Sub)
+ continue;
+
+ /* Initialize new IV, double d = 0.0 in above example. */
+ ConstantInt *C = NULL;
+ if (Incr->getOperand(0) == PH)
+ C = dyn_cast<ConstantInt>(Incr->getOperand(1));
+ else if (Incr->getOperand(1) == PH)
+ C = dyn_cast<ConstantInt>(Incr->getOperand(0));
+ else
+ continue;
+
+ if (!C) continue;
+
+ // Ignore negative constants, as the code below doesn't handle them
+ // correctly. TODO: Remove this restriction.
+ if (!C->getValue().isStrictlyPositive()) continue;
+
+ /* Add new PHINode. */
+ PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
+
+ /* create new increment. '++d' in above example. */
+ Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
+ BinaryOperator *NewIncr =
+ BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
+ Instruction::FAdd : Instruction::FSub,
+ NewPH, CFP, "IV.S.next.", Incr);
+
+ NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
+ NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
+
+ /* Remove cast operation */
+ ShadowUse->replaceAllUsesWith(NewPH);
+ ShadowUse->eraseFromParent();
+ Changed = true;
+ break;
+ }
+}
+
+/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
+/// set the IV user and stride information and return true, otherwise return
+/// false.
+bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
+ for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
+ if (UI->getUser() == Cond) {
+ // NOTE: we could handle setcc instructions with multiple uses here, but
+ // InstCombine does it as well for simple uses, it's not clear that it
+ // occurs enough in real life to handle.
+ CondUse = UI;
+ return true;
+ }
+ return false;
+}
+
+/// OptimizeMax - Rewrite the loop's terminating condition if it uses
+/// a max computation.
+///
+/// This is a narrow solution to a specific, but acute, problem. For loops
+/// like this:
+///
+/// i = 0;
+/// do {
+/// p[i] = 0.0;
+/// } while (++i < n);
+///
+/// the trip count isn't just 'n', because 'n' might not be positive. And
+/// unfortunately this can come up even for loops where the user didn't use
+/// a C do-while loop. For example, seemingly well-behaved top-test loops
+/// will commonly be lowered like this:
+//
+/// if (n > 0) {
+/// i = 0;
+/// do {
+/// p[i] = 0.0;
+/// } while (++i < n);
+/// }
+///
+/// and then it's possible for subsequent optimization to obscure the if
+/// test in such a way that indvars can't find it.
+///
+/// When indvars can't find the if test in loops like this, it creates a
+/// max expression, which allows it to give the loop a canonical
+/// induction variable:
+///
+/// i = 0;
+/// max = n < 1 ? 1 : n;
+/// do {
+/// p[i] = 0.0;
+/// } while (++i != max);
+///
+/// Canonical induction variables are necessary because the loop passes
+/// are designed around them. The most obvious example of this is the
+/// LoopInfo analysis, which doesn't remember trip count values. It
+/// expects to be able to rediscover the trip count each time it is
+/// needed, and it does this using a simple analysis that only succeeds if
+/// the loop has a canonical induction variable.
+///
+/// However, when it comes time to generate code, the maximum operation
+/// can be quite costly, especially if it's inside of an outer loop.
+///
+/// This function solves this problem by detecting this type of loop and
+/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
+/// the instructions for the maximum computation.
+///
+ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
+ // Check that the loop matches the pattern we're looking for.
+ if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
+ Cond->getPredicate() != CmpInst::ICMP_NE)
+ return Cond;
+
+ SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
+ if (!Sel || !Sel->hasOneUse()) return Cond;
+
+ const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+ return Cond;
+ const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
+
+ // Add one to the backedge-taken count to get the trip count.
+ const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
+ if (IterationCount != SE.getSCEV(Sel)) return Cond;
+
+ // Check for a max calculation that matches the pattern. There's no check
+ // for ICMP_ULE here because the comparison would be with zero, which
+ // isn't interesting.
+ CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
+ const SCEVNAryExpr *Max = 0;
+ if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
+ Pred = ICmpInst::ICMP_SLE;
+ Max = S;
+ } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
+ Pred = ICmpInst::ICMP_SLT;
+ Max = S;
+ } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
+ Pred = ICmpInst::ICMP_ULT;
+ Max = U;
+ } else {
+ // No match; bail.
+ return Cond;
+ }
+
+ // To handle a max with more than two operands, this optimization would
+ // require additional checking and setup.
+ if (Max->getNumOperands() != 2)
+ return Cond;
+
+ const SCEV *MaxLHS = Max->getOperand(0);
+ const SCEV *MaxRHS = Max->getOperand(1);
+
+ // ScalarEvolution canonicalizes constants to the left. For < and >, look
+ // for a comparison with 1. For <= and >=, a comparison with zero.
+ if (!MaxLHS ||
+ (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
+ return Cond;
+
+ // Check the relevant induction variable for conformance to
+ // the pattern.
+ const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
+ if (!AR || !AR->isAffine() ||
+ AR->getStart() != One ||
+ AR->getStepRecurrence(SE) != One)
+ return Cond;
+
+ assert(AR->getLoop() == L &&
+ "Loop condition operand is an addrec in a different loop!");
+
+ // Check the right operand of the select, and remember it, as it will
+ // be used in the new comparison instruction.
+ Value *NewRHS = 0;
+ if (ICmpInst::isTrueWhenEqual(Pred)) {
+ // Look for n+1, and grab n.
+ if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
+ if (isa<ConstantInt>(BO->getOperand(1)) &&
+ cast<ConstantInt>(BO->getOperand(1))->isOne() &&
+ SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+ NewRHS = BO->getOperand(0);
+ if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
+ if (isa<ConstantInt>(BO->getOperand(1)) &&
+ cast<ConstantInt>(BO->getOperand(1))->isOne() &&
+ SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+ NewRHS = BO->getOperand(0);
+ if (!NewRHS)
+ return Cond;
+ } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
+ NewRHS = Sel->getOperand(1);
+ else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
+ NewRHS = Sel->getOperand(2);
+ else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
+ NewRHS = SU->getValue();
+ else
+ // Max doesn't match expected pattern.
+ return Cond;
+
+ // Determine the new comparison opcode. It may be signed or unsigned,
+ // and the original comparison may be either equality or inequality.
+ if (Cond->getPredicate() == CmpInst::ICMP_EQ)
+ Pred = CmpInst::getInversePredicate(Pred);
+
+ // Ok, everything looks ok to change the condition into an SLT or SGE and
+ // delete the max calculation.
+ ICmpInst *NewCond =
+ new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
+
+ // Delete the max calculation instructions.
+ Cond->replaceAllUsesWith(NewCond);
+ CondUse->setUser(NewCond);
+ Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
+ Cond->eraseFromParent();
+ Sel->eraseFromParent();
+ if (Cmp->use_empty())
+ Cmp->eraseFromParent();
+ return NewCond;
+}
+
+/// OptimizeLoopTermCond - Change loop terminating condition to use the
+/// postinc iv when possible.
+void
+LSRInstance::OptimizeLoopTermCond() {
+ SmallPtrSet<Instruction *, 4> PostIncs;
+
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitingBlock = ExitingBlocks[i];
+
+ // Get the terminating condition for the loop if possible. If we
+ // can, we want to change it to use a post-incremented version of its
+ // induction variable, to allow coalescing the live ranges for the IV into
+ // one register value.
+
+ BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!TermBr)
+ continue;
+ // FIXME: Overly conservative, termination condition could be an 'or' etc..
+ if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
+ continue;
+
+ // Search IVUsesByStride to find Cond's IVUse if there is one.
+ IVStrideUse *CondUse = 0;
+ ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
+ if (!FindIVUserForCond(Cond, CondUse))
+ continue;
+
+ // If the trip count is computed in terms of a max (due to ScalarEvolution
+ // being unable to find a sufficient guard, for example), change the loop
+ // comparison to use SLT or ULT instead of NE.
+ // One consequence of doing this now is that it disrupts the count-down
+ // optimization. That's not always a bad thing though, because in such
+ // cases it may still be worthwhile to avoid a max.
+ Cond = OptimizeMax(Cond, CondUse);
+
+ // If this exiting block dominates the latch block, it may also use
+ // the post-inc value if it won't be shared with other uses.
+ // Check for dominance.
+ if (!DT.dominates(ExitingBlock, LatchBlock))
+ continue;
+
+ // Conservatively avoid trying to use the post-inc value in non-latch
+ // exits if there may be pre-inc users in intervening blocks.
+ if (LatchBlock != ExitingBlock)
+ for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
+ // Test if the use is reachable from the exiting block. This dominator
+ // query is a conservative approximation of reachability.
+ if (&*UI != CondUse &&
+ !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
+ // Conservatively assume there may be reuse if the quotient of their
+ // strides could be a legal scale.
+ const SCEV *A = IU.getStride(*CondUse, L);
+ const SCEV *B = IU.getStride(*UI, L);
+ if (!A || !B) continue;
+ if (SE.getTypeSizeInBits(A->getType()) !=
+ SE.getTypeSizeInBits(B->getType())) {
+ if (SE.getTypeSizeInBits(A->getType()) >
+ SE.getTypeSizeInBits(B->getType()))
+ B = SE.getSignExtendExpr(B, A->getType());
+ else
+ A = SE.getSignExtendExpr(A, B->getType());
+ }
+ if (const SCEVConstant *D =
+ dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
+ const ConstantInt *C = D->getValue();
+ // Stride of one or negative one can have reuse with non-addresses.
+ if (C->isOne() || C->isAllOnesValue())
+ goto decline_post_inc;
+ // Avoid weird situations.
+ if (C->getValue().getMinSignedBits() >= 64 ||
+ C->getValue().isMinSignedValue())
+ goto decline_post_inc;
+ // Without TLI, assume that any stride might be valid, and so any
+ // use might be shared.
+ if (!TLI)
+ goto decline_post_inc;
+ // Check for possible scaled-address reuse.
+ const Type *AccessTy = getAccessType(UI->getUser());
+ TargetLowering::AddrMode AM;
+ AM.Scale = C->getSExtValue();
+ if (TLI->isLegalAddressingMode(AM, AccessTy))
+ goto decline_post_inc;
+ AM.Scale = -AM.Scale;
+ if (TLI->isLegalAddressingMode(AM, AccessTy))
+ goto decline_post_inc;
+ }
+ }
+
+ DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
+ << *Cond << '\n');
+
+ // It's possible for the setcc instruction to be anywhere in the loop, and
+ // possible for it to have multiple users. If it is not immediately before
+ // the exiting block branch, move it.
+ if (&*++BasicBlock::iterator(Cond) != TermBr) {
+ if (Cond->hasOneUse()) {
+ Cond->moveBefore(TermBr);
+ } else {
+ // Clone the terminating condition and insert into the loopend.
+ ICmpInst *OldCond = Cond;
+ Cond = cast<ICmpInst>(Cond->clone());
+ Cond->setName(L->getHeader()->getName() + ".termcond");
+ ExitingBlock->getInstList().insert(TermBr, Cond);
+
+ // Clone the IVUse, as the old use still exists!
+ CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
+ TermBr->replaceUsesOfWith(OldCond, Cond);
+ }
+ }
+
+ // If we get to here, we know that we can transform the setcc instruction to
+ // use the post-incremented version of the IV, allowing us to coalesce the
+ // live ranges for the IV correctly.
+ CondUse->transformToPostInc(L);
+ Changed = true;
+
+ PostIncs.insert(Cond);
+ decline_post_inc:;
+ }
+
+ // Determine an insertion point for the loop induction variable increment. It
+ // must dominate all the post-inc comparisons we just set up, and it must
+ // dominate the loop latch edge.
+ IVIncInsertPos = L->getLoopLatch()->getTerminator();
+ for (SmallPtrSet<Instruction *, 4>::const_iterator I = PostIncs.begin(),
+ E = PostIncs.end(); I != E; ++I) {
+ BasicBlock *BB =
+ DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
+ (*I)->getParent());
+ if (BB == (*I)->getParent())
+ IVIncInsertPos = *I;
+ else if (BB != IVIncInsertPos->getParent())
+ IVIncInsertPos = BB->getTerminator();
+ }
+}
+
+/// reconcileNewOffset - Determine if the given use can accomodate a fixup
+/// at the given offset and other details. If so, update the use and
+/// return true.
+bool
+LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
+ LSRUse::KindType Kind, const Type *AccessTy) {
+ int64_t NewMinOffset = LU.MinOffset;
+ int64_t NewMaxOffset = LU.MaxOffset;
+ const Type *NewAccessTy = AccessTy;
+
+ // Check for a mismatched kind. It's tempting to collapse mismatched kinds to
+ // something conservative, however this can pessimize in the case that one of
+ // the uses will have all its uses outside the loop, for example.
+ if (LU.Kind != Kind)
+ return false;
+ // Conservatively assume HasBaseReg is true for now.
+ if (NewOffset < LU.MinOffset) {
+ if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg,
+ Kind, AccessTy, TLI))
+ return false;
+ NewMinOffset = NewOffset;
+ } else if (NewOffset > LU.MaxOffset) {
+ if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg,
+ Kind, AccessTy, TLI))
+ return false;
+ NewMaxOffset = NewOffset;
+ }
+ // Check for a mismatched access type, and fall back conservatively as needed.
+ // TODO: Be less conservative when the type is similar and can use the same
+ // addressing modes.
+ if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
+ NewAccessTy = Type::getVoidTy(AccessTy->getContext());
+
+ // Update the use.
+ LU.MinOffset = NewMinOffset;
+ LU.MaxOffset = NewMaxOffset;
+ LU.AccessTy = NewAccessTy;
+ if (NewOffset != LU.Offsets.back())
+ LU.Offsets.push_back(NewOffset);
+ return true;
+}
+
+/// getUse - Return an LSRUse index and an offset value for a fixup which
+/// needs the given expression, with the given kind and optional access type.
+/// Either reuse an existing use or create a new one, as needed.
+std::pair<size_t, int64_t>
+LSRInstance::getUse(const SCEV *&Expr,
+ LSRUse::KindType Kind, const Type *AccessTy) {
+ const SCEV *Copy = Expr;
+ int64_t Offset = ExtractImmediate(Expr, SE);
+
+ // Basic uses can't accept any offset, for example.
+ if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
+ Expr = Copy;
+ Offset = 0;
+ }
+
+ std::pair<UseMapTy::iterator, bool> P =
+ UseMap.insert(std::make_pair(std::make_pair(Expr, Kind), 0));
+ if (!P.second) {
+ // A use already existed with this base.
+ size_t LUIdx = P.first->second;
+ LSRUse &LU = Uses[LUIdx];
+ if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))
+ // Reuse this use.
+ return std::make_pair(LUIdx, Offset);
+ }
+
+ // Create a new use.
+ size_t LUIdx = Uses.size();
+ P.first->second = LUIdx;
+ Uses.push_back(LSRUse(Kind, AccessTy));
+ LSRUse &LU = Uses[LUIdx];
+
+ // We don't need to track redundant offsets, but we don't need to go out
+ // of our way here to avoid them.
+ if (LU.Offsets.empty() || Offset != LU.Offsets.back())
+ LU.Offsets.push_back(Offset);
+
+ LU.MinOffset = Offset;
+ LU.MaxOffset = Offset;
+ return std::make_pair(LUIdx, Offset);
+}
+
+/// DeleteUse - Delete the given use from the Uses list.
+void LSRInstance::DeleteUse(LSRUse &LU) {
+ if (&LU != &Uses.back())
+ std::swap(LU, Uses.back());
+ Uses.pop_back();
+}
+
+/// FindUseWithFormula - Look for a use distinct from OrigLU which is has
+/// a formula that has the same registers as the given formula.
+LSRUse *
+LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
+ const LSRUse &OrigLU) {
+ // Search all uses for the formula. This could be more clever.
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ // Check whether this use is close enough to OrigLU, to see whether it's
+ // worthwhile looking through its formulae.
+ // Ignore ICmpZero uses because they may contain formulae generated by
+ // GenerateICmpZeroScales, in which case adding fixup offsets may
+ // be invalid.
+ if (&LU != &OrigLU &&
+ LU.Kind != LSRUse::ICmpZero &&
+ LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
+ LU.WidestFixupType == OrigLU.WidestFixupType &&
+ LU.HasFormulaWithSameRegs(OrigF)) {
+ // Scan through this use's formulae.
+ for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+ E = LU.Formulae.end(); I != E; ++I) {
+ const Formula &F = *I;
+ // Check to see if this formula has the same registers and symbols
+ // as OrigF.
+ if (F.BaseRegs == OrigF.BaseRegs &&
+ F.ScaledReg == OrigF.ScaledReg &&
+ F.AM.BaseGV == OrigF.AM.BaseGV &&
+ F.AM.Scale == OrigF.AM.Scale) {
+ if (F.AM.BaseOffs == 0)
+ return &LU;
+ // This is the formula where all the registers and symbols matched;
+ // there aren't going to be any others. Since we declined it, we
+ // can skip the rest of the formulae and procede to the next LSRUse.
+ break;
+ }
+ }
+ }
+ }
+
+ // Nothing looked good.
+ return 0;
+}
+
+void LSRInstance::CollectInterestingTypesAndFactors() {
+ SmallSetVector<const SCEV *, 4> Strides;
+
+ // Collect interesting types and strides.
+ SmallVector<const SCEV *, 4> Worklist;
+ for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+ const SCEV *Expr = IU.getExpr(*UI);
+
+ // Collect interesting types.
+ Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
+
+ // Add strides for mentioned loops.
+ Worklist.push_back(Expr);
+ do {
+ const SCEV *S = Worklist.pop_back_val();
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ Strides.insert(AR->getStepRecurrence(SE));
+ Worklist.push_back(AR->getStart());
+ } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ Worklist.append(Add->op_begin(), Add->op_end());
+ }
+ } while (!Worklist.empty());
+ }
+
+ // Compute interesting factors from the set of interesting strides.
+ for (SmallSetVector<const SCEV *, 4>::const_iterator
+ I = Strides.begin(), E = Strides.end(); I != E; ++I)
+ for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
+ llvm::next(I); NewStrideIter != E; ++NewStrideIter) {
+ const SCEV *OldStride = *I;
+ const SCEV *NewStride = *NewStrideIter;
+
+ if (SE.getTypeSizeInBits(OldStride->getType()) !=
+ SE.getTypeSizeInBits(NewStride->getType())) {
+ if (SE.getTypeSizeInBits(OldStride->getType()) >
+ SE.getTypeSizeInBits(NewStride->getType()))
+ NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
+ else
+ OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
+ }
+ if (const SCEVConstant *Factor =
+ dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
+ SE, true))) {
+ if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
+ Factors.insert(Factor->getValue()->getValue().getSExtValue());
+ } else if (const SCEVConstant *Factor =
+ dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
+ NewStride,
+ SE, true))) {
+ if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
+ Factors.insert(Factor->getValue()->getValue().getSExtValue());
+ }
+ }
+
+ // If all uses use the same type, don't bother looking for truncation-based
+ // reuse.
+ if (Types.size() == 1)
+ Types.clear();
+
+ DEBUG(print_factors_and_types(dbgs()));
+}
+
+void LSRInstance::CollectFixupsAndInitialFormulae() {
+ for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+ // Record the uses.
+ LSRFixup &LF = getNewFixup();
+ LF.UserInst = UI->getUser();
+ LF.OperandValToReplace = UI->getOperandValToReplace();
+ LF.PostIncLoops = UI->getPostIncLoops();
+
+ LSRUse::KindType Kind = LSRUse::Basic;
+ const Type *AccessTy = 0;
+ if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
+ Kind = LSRUse::Address;
+ AccessTy = getAccessType(LF.UserInst);
+ }
+
+ const SCEV *S = IU.getExpr(*UI);
+
+ // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
+ // (N - i == 0), and this allows (N - i) to be the expression that we work
+ // with rather than just N or i, so we can consider the register
+ // requirements for both N and i at the same time. Limiting this code to
+ // equality icmps is not a problem because all interesting loops use
+ // equality icmps, thanks to IndVarSimplify.
+ if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst))
+ if (CI->isEquality()) {
+ // Swap the operands if needed to put the OperandValToReplace on the
+ // left, for consistency.
+ Value *NV = CI->getOperand(1);
+ if (NV == LF.OperandValToReplace) {
+ CI->setOperand(1, CI->getOperand(0));
+ CI->setOperand(0, NV);
+ NV = CI->getOperand(1);
+ Changed = true;
+ }
+
+ // x == y --> x - y == 0
+ const SCEV *N = SE.getSCEV(NV);
+ if (N->isLoopInvariant(L)) {
+ Kind = LSRUse::ICmpZero;
+ S = SE.getMinusSCEV(N, S);
+ }
+
+ // -1 and the negations of all interesting strides (except the negation
+ // of -1) are now also interesting.
+ for (size_t i = 0, e = Factors.size(); i != e; ++i)
+ if (Factors[i] != -1)
+ Factors.insert(-(uint64_t)Factors[i]);
+ Factors.insert(-1);
+ }
+
+ // Set up the initial formula for this use.
+ std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
+ LF.LUIdx = P.first;
+ LF.Offset = P.second;
+ LSRUse &LU = Uses[LF.LUIdx];
+ LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+ if (!LU.WidestFixupType ||
+ SE.getTypeSizeInBits(LU.WidestFixupType) <
+ SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
+ LU.WidestFixupType = LF.OperandValToReplace->getType();
+
+ // If this is the first use of this LSRUse, give it a formula.
+ if (LU.Formulae.empty()) {
+ InsertInitialFormula(S, LU, LF.LUIdx);
+ CountRegisters(LU.Formulae.back(), LF.LUIdx);
+ }
+ }
+
+ DEBUG(print_fixups(dbgs()));
+}
+
+/// InsertInitialFormula - Insert a formula for the given expression into
+/// the given use, separating out loop-variant portions from loop-invariant
+/// and loop-computable portions.
+void
+LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
+ Formula F;
+ F.InitialMatch(S, L, SE, DT);
+ bool Inserted = InsertFormula(LU, LUIdx, F);
+ assert(Inserted && "Initial formula already exists!"); (void)Inserted;
+}
+
+/// InsertSupplementalFormula - Insert a simple single-register formula for
+/// the given expression into the given use.
+void
+LSRInstance::InsertSupplementalFormula(const SCEV *S,
+ LSRUse &LU, size_t LUIdx) {
+ Formula F;
+ F.BaseRegs.push_back(S);
+ F.AM.HasBaseReg = true;
+ bool Inserted = InsertFormula(LU, LUIdx, F);
+ assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
+}
+
+/// CountRegisters - Note which registers are used by the given formula,
+/// updating RegUses.
+void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
+ if (F.ScaledReg)
+ RegUses.CountRegister(F.ScaledReg, LUIdx);
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+ E = F.BaseRegs.end(); I != E; ++I)
+ RegUses.CountRegister(*I, LUIdx);
+}
+
+/// InsertFormula - If the given formula has not yet been inserted, add it to
+/// the list, and return true. Return false otherwise.
+bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
+ if (!LU.InsertFormula(F))
+ return false;
+
+ CountRegisters(F, LUIdx);
+ return true;
+}
+
+/// CollectLoopInvariantFixupsAndFormulae - Check for other uses of
+/// loop-invariant values which we're tracking. These other uses will pin these
+/// values in registers, making them less profitable for elimination.
+/// TODO: This currently misses non-constant addrec step registers.
+/// TODO: Should this give more weight to users inside the loop?
+void
+LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
+ SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
+ SmallPtrSet<const SCEV *, 8> Inserted;
+
+ while (!Worklist.empty()) {
+ const SCEV *S = Worklist.pop_back_val();
+
+ if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
+ Worklist.append(N->op_begin(), N->op_end());
+ else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+ Worklist.push_back(C->getOperand());
+ else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+ Worklist.push_back(D->getLHS());
+ Worklist.push_back(D->getRHS());
+ } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ if (!Inserted.insert(U)) continue;
+ const Value *V = U->getValue();
+ if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
+ // Look for instructions defined outside the loop.
+ if (L->contains(Inst)) continue;
+ } else if (isa<UndefValue>(V))
+ // Undef doesn't have a live range, so it doesn't matter.
+ continue;
+ for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ const Instruction *UserInst = dyn_cast<Instruction>(*UI);
+ // Ignore non-instructions.
+ if (!UserInst)
+ continue;
+ // Ignore instructions in other functions (as can happen with
+ // Constants).
+ if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
+ continue;
+ // Ignore instructions not dominated by the loop.
+ const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
+ UserInst->getParent() :
+ cast<PHINode>(UserInst)->getIncomingBlock(
+ PHINode::getIncomingValueNumForOperand(UI.getOperandNo()));
+ if (!DT.dominates(L->getHeader(), UseBB))
+ continue;
+ // Ignore uses which are part of other SCEV expressions, to avoid
+ // analyzing them multiple times.
+ if (SE.isSCEVable(UserInst->getType())) {
+ const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
+ // If the user is a no-op, look through to its uses.
+ if (!isa<SCEVUnknown>(UserS))
+ continue;
+ if (UserS == U) {
+ Worklist.push_back(
+ SE.getUnknown(const_cast<Instruction *>(UserInst)));
+ continue;
+ }
+ }
+ // Ignore icmp instructions which are already being analyzed.
+ if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
+ unsigned OtherIdx = !UI.getOperandNo();
+ Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
+ if (SE.getSCEV(OtherOp)->hasComputableLoopEvolution(L))
+ continue;
+ }
+
+ LSRFixup &LF = getNewFixup();
+ LF.UserInst = const_cast<Instruction *>(UserInst);
+ LF.OperandValToReplace = UI.getUse();
+ std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0);
+ LF.LUIdx = P.first;
+ LF.Offset = P.second;
+ LSRUse &LU = Uses[LF.LUIdx];
+ LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+ if (!LU.WidestFixupType ||
+ SE.getTypeSizeInBits(LU.WidestFixupType) <
+ SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
+ LU.WidestFixupType = LF.OperandValToReplace->getType();
+ InsertSupplementalFormula(U, LU, LF.LUIdx);
+ CountRegisters(LU.Formulae.back(), Uses.size() - 1);
+ break;
+ }
+ }
+ }
+}
+
+/// CollectSubexprs - Split S into subexpressions which can be pulled out into
+/// separate registers. If C is non-null, multiply each subexpression by C.
+static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
+ SmallVectorImpl<const SCEV *> &Ops,
+ const Loop *L,
+ ScalarEvolution &SE) {
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ // Break out add operands.
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I)
+ CollectSubexprs(*I, C, Ops, L, SE);
+ return;
+ } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ // Split a non-zero base out of an addrec.
+ if (!AR->getStart()->isZero()) {
+ CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
+ AR->getStepRecurrence(SE),
+ AR->getLoop()),
+ C, Ops, L, SE);
+ CollectSubexprs(AR->getStart(), C, Ops, L, SE);
+ return;
+ }
+ } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+ // Break (C * (a + b + c)) into C*a + C*b + C*c.
+ if (Mul->getNumOperands() == 2)
+ if (const SCEVConstant *Op0 =
+ dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+ CollectSubexprs(Mul->getOperand(1),
+ C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
+ Ops, L, SE);
+ return;
+ }
+ }
+
+ // Otherwise use the value itself, optionally with a scale applied.
+ Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+}
+
+/// GenerateReassociations - Split out subexpressions from adds and the bases of
+/// addrecs.
+void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
+ Formula Base,
+ unsigned Depth) {
+ // Arbitrarily cap recursion to protect compile time.
+ if (Depth >= 3) return;
+
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+ const SCEV *BaseReg = Base.BaseRegs[i];
+
+ SmallVector<const SCEV *, 8> AddOps;
+ CollectSubexprs(BaseReg, 0, AddOps, L, SE);
+
+ if (AddOps.size() == 1) continue;
+
+ for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
+ JE = AddOps.end(); J != JE; ++J) {
+
+ // Loop-variant "unknown" values are uninteresting; we won't be able to
+ // do anything meaningful with them.
+ if (isa<SCEVUnknown>(*J) && !(*J)->isLoopInvariant(L))
+ continue;
+
+ // Don't pull a constant into a register if the constant could be folded
+ // into an immediate field.
+ if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
+ Base.getNumRegs() > 1,
+ LU.Kind, LU.AccessTy, TLI, SE))
+ continue;
+
+ // Collect all operands except *J.
+ SmallVector<const SCEV *, 8> InnerAddOps
+ (((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
+ InnerAddOps.append
+ (llvm::next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
+
+ // Don't leave just a constant behind in a register if the constant could
+ // be folded into an immediate field.
+ if (InnerAddOps.size() == 1 &&
+ isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
+ Base.getNumRegs() > 1,
+ LU.Kind, LU.AccessTy, TLI, SE))
+ continue;
+
+ const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
+ if (InnerSum->isZero())
+ continue;
+ Formula F = Base;
+ F.BaseRegs[i] = InnerSum;
+ F.BaseRegs.push_back(*J);
+ if (InsertFormula(LU, LUIdx, F))
+ // If that formula hadn't been seen before, recurse to find more like
+ // it.
+ GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1);
+ }
+ }
+}
+
+/// GenerateCombinations - Generate a formula consisting of all of the
+/// loop-dominating registers added into a single register.
+void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
+ Formula Base) {
+ // This method is only interesting on a plurality of registers.
+ if (Base.BaseRegs.size() <= 1) return;
+
+ Formula F = Base;
+ F.BaseRegs.clear();
+ SmallVector<const SCEV *, 4> Ops;
+ for (SmallVectorImpl<const SCEV *>::const_iterator
+ I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
+ const SCEV *BaseReg = *I;
+ if (BaseReg->properlyDominates(L->getHeader(), &DT) &&
+ !BaseReg->hasComputableLoopEvolution(L))
+ Ops.push_back(BaseReg);
+ else
+ F.BaseRegs.push_back(BaseReg);
+ }
+ if (Ops.size() > 1) {
+ const SCEV *Sum = SE.getAddExpr(Ops);
+ // TODO: If Sum is zero, it probably means ScalarEvolution missed an
+ // opportunity to fold something. For now, just ignore such cases
+ // rather than proceed with zero in a register.
+ if (!Sum->isZero()) {
+ F.BaseRegs.push_back(Sum);
+ (void)InsertFormula(LU, LUIdx, F);
+ }
+ }
+}
+
+/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
+void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
+ Formula Base) {
+ // We can't add a symbolic offset if the address already contains one.
+ if (Base.AM.BaseGV) return;
+
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+ const SCEV *G = Base.BaseRegs[i];
+ GlobalValue *GV = ExtractSymbol(G, SE);
+ if (G->isZero() || !GV)
+ continue;
+ Formula F = Base;
+ F.AM.BaseGV = GV;
+ if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
+ LU.Kind, LU.AccessTy, TLI))
+ continue;
+ F.BaseRegs[i] = G;
+ (void)InsertFormula(LU, LUIdx, F);
+ }
+}
+
+/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
+void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
+ Formula Base) {
+ // TODO: For now, just add the min and max offset, because it usually isn't
+ // worthwhile looking at everything inbetween.
+ SmallVector<int64_t, 2> Worklist;
+ Worklist.push_back(LU.MinOffset);
+ if (LU.MaxOffset != LU.MinOffset)
+ Worklist.push_back(LU.MaxOffset);
+
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+ const SCEV *G = Base.BaseRegs[i];
+
+ for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
+ E = Worklist.end(); I != E; ++I) {
+ Formula F = Base;
+ F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
+ if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
+ LU.Kind, LU.AccessTy, TLI)) {
+ // Add the offset to the base register.
+ const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
+ // If it cancelled out, drop the base register, otherwise update it.
+ if (NewG->isZero()) {
+ std::swap(F.BaseRegs[i], F.BaseRegs.back());
+ F.BaseRegs.pop_back();
+ } else
+ F.BaseRegs[i] = NewG;
+
+ (void)InsertFormula(LU, LUIdx, F);
+ }
+ }
+
+ int64_t Imm = ExtractImmediate(G, SE);
+ if (G->isZero() || Imm == 0)
+ continue;
+ Formula F = Base;
+ F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm;
+ if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
+ LU.Kind, LU.AccessTy, TLI))
+ continue;
+ F.BaseRegs[i] = G;
+ (void)InsertFormula(LU, LUIdx, F);
+ }
+}
+
+/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
+/// the comparison. For example, x == y -> x*c == y*c.
+void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
+ Formula Base) {
+ if (LU.Kind != LSRUse::ICmpZero) return;
+
+ // Determine the integer type for the base formula.
+ const Type *IntTy = Base.getType();
+ if (!IntTy) return;
+ if (SE.getTypeSizeInBits(IntTy) > 64) return;
+
+ // Don't do this if there is more than one offset.
+ if (LU.MinOffset != LU.MaxOffset) return;
+
+ assert(!Base.AM.BaseGV && "ICmpZero use is not legal!");
+
+ // Check each interesting stride.
+ for (SmallSetVector<int64_t, 8>::const_iterator
+ I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+ int64_t Factor = *I;
+
+ // Check that the multiplication doesn't overflow.
+ if (Base.AM.BaseOffs == INT64_MIN && Factor == -1)
+ continue;
+ int64_t NewBaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
+ if (NewBaseOffs / Factor != Base.AM.BaseOffs)
+ continue;
+
+ // Check that multiplying with the use offset doesn't overflow.
+ int64_t Offset = LU.MinOffset;
+ if (Offset == INT64_MIN && Factor == -1)
+ continue;
+ Offset = (uint64_t)Offset * Factor;
+ if (Offset / Factor != LU.MinOffset)
+ continue;
+
+ Formula F = Base;
+ F.AM.BaseOffs = NewBaseOffs;
+
+ // Check that this scale is legal.
+ if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
+ continue;
+
+ // Compensate for the use having MinOffset built into it.
+ F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Offset - LU.MinOffset;
+
+ const SCEV *FactorS = SE.getConstant(IntTy, Factor);
+
+ // Check that multiplying with each base register doesn't overflow.
+ for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
+ F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
+ if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
+ goto next;
+ }
+
+ // Check that multiplying with the scaled register doesn't overflow.
+ if (F.ScaledReg) {
+ F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
+ if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
+ continue;
+ }
+
+ // If we make it here and it's legal, add it.
+ (void)InsertFormula(LU, LUIdx, F);
+ next:;
+ }
+}
+
+/// GenerateScales - Generate stride factor reuse formulae by making use of
+/// scaled-offset address modes, for example.
+void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
+ // Determine the integer type for the base formula.
+ const Type *IntTy = Base.getType();
+ if (!IntTy) return;
+
+ // If this Formula already has a scaled register, we can't add another one.
+ if (Base.AM.Scale != 0) return;
+
+ // Check each interesting stride.
+ for (SmallSetVector<int64_t, 8>::const_iterator
+ I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+ int64_t Factor = *I;
+
+ Base.AM.Scale = Factor;
+ Base.AM.HasBaseReg = Base.BaseRegs.size() > 1;
+ // Check whether this scale is going to be legal.
+ if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
+ LU.Kind, LU.AccessTy, TLI)) {
+ // As a special-case, handle special out-of-loop Basic users specially.
+ // TODO: Reconsider this special case.
+ if (LU.Kind == LSRUse::Basic &&
+ isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
+ LSRUse::Special, LU.AccessTy, TLI) &&
+ LU.AllFixupsOutsideLoop)
+ LU.Kind = LSRUse::Special;
+ else
+ continue;
+ }
+ // For an ICmpZero, negating a solitary base register won't lead to
+ // new solutions.
+ if (LU.Kind == LSRUse::ICmpZero &&
+ !Base.AM.HasBaseReg && Base.AM.BaseOffs == 0 && !Base.AM.BaseGV)
+ continue;
+ // For each addrec base reg, apply the scale, if possible.
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+ if (const SCEVAddRecExpr *AR =
+ dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
+ const SCEV *FactorS = SE.getConstant(IntTy, Factor);
+ if (FactorS->isZero())
+ continue;
+ // Divide out the factor, ignoring high bits, since we'll be
+ // scaling the value back up in the end.
+ if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
+ // TODO: This could be optimized to avoid all the copying.
+ Formula F = Base;
+ F.ScaledReg = Quotient;
+ F.DeleteBaseReg(F.BaseRegs[i]);
+ (void)InsertFormula(LU, LUIdx, F);
+ }
+ }
+ }
+}
+
+/// GenerateTruncates - Generate reuse formulae from different IV types.
+void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
+ // This requires TargetLowering to tell us which truncates are free.
+ if (!TLI) return;
+
+ // Don't bother truncating symbolic values.
+ if (Base.AM.BaseGV) return;
+
+ // Determine the integer type for the base formula.
+ const Type *DstTy = Base.getType();
+ if (!DstTy) return;
+ DstTy = SE.getEffectiveSCEVType(DstTy);
+
+ for (SmallSetVector<const Type *, 4>::const_iterator
+ I = Types.begin(), E = Types.end(); I != E; ++I) {
+ const Type *SrcTy = *I;
+ if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
+ Formula F = Base;
+
+ if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
+ for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
+ JE = F.BaseRegs.end(); J != JE; ++J)
+ *J = SE.getAnyExtendExpr(*J, SrcTy);
+
+ // TODO: This assumes we've done basic processing on all uses and
+ // have an idea what the register usage is.
+ if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
+ continue;
+
+ (void)InsertFormula(LU, LUIdx, F);
+ }
+ }
+}
+
+namespace {
+
+/// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to
+/// defer modifications so that the search phase doesn't have to worry about
+/// the data structures moving underneath it.
+struct WorkItem {
+ size_t LUIdx;
+ int64_t Imm;
+ const SCEV *OrigReg;
+
+ WorkItem(size_t LI, int64_t I, const SCEV *R)
+ : LUIdx(LI), Imm(I), OrigReg(R) {}
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
+
+}
+
+void WorkItem::print(raw_ostream &OS) const {
+ OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
+ << " , add offset " << Imm;
+}
+
+void WorkItem::dump() const {
+ print(errs()); errs() << '\n';
+}
+
+/// GenerateCrossUseConstantOffsets - Look for registers which are a constant
+/// distance apart and try to form reuse opportunities between them.
+void LSRInstance::GenerateCrossUseConstantOffsets() {
+ // Group the registers by their value without any added constant offset.
+ typedef std::map<int64_t, const SCEV *> ImmMapTy;
+ typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy;
+ RegMapTy Map;
+ DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
+ SmallVector<const SCEV *, 8> Sequence;
+ for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
+ I != E; ++I) {
+ const SCEV *Reg = *I;
+ int64_t Imm = ExtractImmediate(Reg, SE);
+ std::pair<RegMapTy::iterator, bool> Pair =
+ Map.insert(std::make_pair(Reg, ImmMapTy()));
+ if (Pair.second)
+ Sequence.push_back(Reg);
+ Pair.first->second.insert(std::make_pair(Imm, *I));
+ UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(*I);
+ }
+
+ // Now examine each set of registers with the same base value. Build up
+ // a list of work to do and do the work in a separate step so that we're
+ // not adding formulae and register counts while we're searching.
+ SmallVector<WorkItem, 32> WorkItems;
+ SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
+ E = Sequence.end(); I != E; ++I) {
+ const SCEV *Reg = *I;
+ const ImmMapTy &Imms = Map.find(Reg)->second;
+
+ // It's not worthwhile looking for reuse if there's only one offset.
+ if (Imms.size() == 1)
+ continue;
+
+ DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
+ for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
+ J != JE; ++J)
+ dbgs() << ' ' << J->first;
+ dbgs() << '\n');
+
+ // Examine each offset.
+ for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
+ J != JE; ++J) {
+ const SCEV *OrigReg = J->second;
+
+ int64_t JImm = J->first;
+ const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
+
+ if (!isa<SCEVConstant>(OrigReg) &&
+ UsedByIndicesMap[Reg].count() == 1) {
+ DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n');
+ continue;
+ }
+
+ // Conservatively examine offsets between this orig reg a few selected
+ // other orig regs.
+ ImmMapTy::const_iterator OtherImms[] = {
+ Imms.begin(), prior(Imms.end()),
+ Imms.upper_bound((Imms.begin()->first + prior(Imms.end())->first) / 2)
+ };
+ for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
+ ImmMapTy::const_iterator M = OtherImms[i];
+ if (M == J || M == JE) continue;
+
+ // Compute the difference between the two.
+ int64_t Imm = (uint64_t)JImm - M->first;
+ for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1;
+ LUIdx = UsedByIndices.find_next(LUIdx))
+ // Make a memo of this use, offset, and register tuple.
+ if (UniqueItems.insert(std::make_pair(LUIdx, Imm)))
+ WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
+ }
+ }
+ }
+
+ Map.clear();
+ Sequence.clear();
+ UsedByIndicesMap.clear();
+ UniqueItems.clear();
+
+ // Now iterate through the worklist and add new formulae.
+ for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(),
+ E = WorkItems.end(); I != E; ++I) {
+ const WorkItem &WI = *I;
+ size_t LUIdx = WI.LUIdx;
+ LSRUse &LU = Uses[LUIdx];
+ int64_t Imm = WI.Imm;
+ const SCEV *OrigReg = WI.OrigReg;
+
+ const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
+ const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
+ unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
+
+ // TODO: Use a more targeted data structure.
+ for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
+ const Formula &F = LU.Formulae[L];
+ // Use the immediate in the scaled register.
+ if (F.ScaledReg == OrigReg) {
+ int64_t Offs = (uint64_t)F.AM.BaseOffs +
+ Imm * (uint64_t)F.AM.Scale;
+ // Don't create 50 + reg(-50).
+ if (F.referencesReg(SE.getSCEV(
+ ConstantInt::get(IntTy, -(uint64_t)Offs))))
+ continue;
+ Formula NewF = F;
+ NewF.AM.BaseOffs = Offs;
+ if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
+ LU.Kind, LU.AccessTy, TLI))
+ continue;
+ NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
+
+ // If the new scale is a constant in a register, and adding the constant
+ // value to the immediate would produce a value closer to zero than the
+ // immediate itself, then the formula isn't worthwhile.
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
+ if (C->getValue()->getValue().isNegative() !=
+ (NewF.AM.BaseOffs < 0) &&
+ (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale))
+ .ule(abs64(NewF.AM.BaseOffs)))
+ continue;
+
+ // OK, looks good.
+ (void)InsertFormula(LU, LUIdx, NewF);
+ } else {
+ // Use the immediate in a base register.
+ for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
+ const SCEV *BaseReg = F.BaseRegs[N];
+ if (BaseReg != OrigReg)
+ continue;
+ Formula NewF = F;
+ NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
+ if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
+ LU.Kind, LU.AccessTy, TLI))
+ continue;
+ NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
+
+ // If the new formula has a constant in a register, and adding the
+ // constant value to the immediate would produce a value closer to
+ // zero than the immediate itself, then the formula isn't worthwhile.
+ for (SmallVectorImpl<const SCEV *>::const_iterator
+ J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
+ J != JE; ++J)
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
+ if ((C->getValue()->getValue() + NewF.AM.BaseOffs).abs().slt(
+ abs64(NewF.AM.BaseOffs)) &&
+ (C->getValue()->getValue() +
+ NewF.AM.BaseOffs).countTrailingZeros() >=
+ CountTrailingZeros_64(NewF.AM.BaseOffs))
+ goto skip_formula;
+
+ // Ok, looks good.
+ (void)InsertFormula(LU, LUIdx, NewF);
+ break;
+ skip_formula:;
+ }
+ }
+ }
+ }
+}
+
+/// GenerateAllReuseFormulae - Generate formulae for each use.
+void
+LSRInstance::GenerateAllReuseFormulae() {
+ // This is split into multiple loops so that hasRegsUsedByUsesOtherThan
+ // queries are more precise.
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
+ }
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateScales(LU, LUIdx, LU.Formulae[i]);
+ }
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
+ }
+
+ GenerateCrossUseConstantOffsets();
+
+ DEBUG(dbgs() << "\n"
+ "After generating reuse formulae:\n";
+ print_uses(dbgs()));
+}
+
+/// If their are multiple formulae with the same set of registers used
+/// by other uses, pick the best one and delete the others.
+void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
+#ifndef NDEBUG
+ bool ChangedFormulae = false;
+#endif
+
+ // Collect the best formula for each unique set of shared registers. This
+ // is reset for each use.
+ typedef DenseMap<SmallVector<const SCEV *, 2>, size_t, UniquifierDenseMapInfo>
+ BestFormulaeTy;
+ BestFormulaeTy BestFormulae;
+
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ FormulaSorter Sorter(L, LU, SE, DT);
+ DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
+
+ bool Any = false;
+ for (size_t FIdx = 0, NumForms = LU.Formulae.size();
+ FIdx != NumForms; ++FIdx) {
+ Formula &F = LU.Formulae[FIdx];
+
+ SmallVector<const SCEV *, 2> Key;
+ for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
+ JE = F.BaseRegs.end(); J != JE; ++J) {
+ const SCEV *Reg = *J;
+ if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
+ Key.push_back(Reg);
+ }
+ if (F.ScaledReg &&
+ RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
+ Key.push_back(F.ScaledReg);
+ // Unstable sort by host order ok, because this is only used for
+ // uniquifying.
+ std::sort(Key.begin(), Key.end());
+
+ std::pair<BestFormulaeTy::const_iterator, bool> P =
+ BestFormulae.insert(std::make_pair(Key, FIdx));
+ if (!P.second) {
+ Formula &Best = LU.Formulae[P.first->second];
+ if (Sorter.operator()(F, Best))
+ std::swap(F, Best);
+ DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
+ dbgs() << "\n"
+ " in favor of formula "; Best.print(dbgs());
+ dbgs() << '\n');
+#ifndef NDEBUG
+ ChangedFormulae = true;
+#endif
+ LU.DeleteFormula(F);
+ --FIdx;
+ --NumForms;
+ Any = true;
+ continue;
+ }
+ }
+
+ // Now that we've filtered out some formulae, recompute the Regs set.
+ if (Any)
+ LU.RecomputeRegs(LUIdx, RegUses);
+
+ // Reset this to prepare for the next use.
+ BestFormulae.clear();
+ }
+
+ DEBUG(if (ChangedFormulae) {
+ dbgs() << "\n"
+ "After filtering out undesirable candidates:\n";
+ print_uses(dbgs());
+ });
+}
+
+// This is a rough guess that seems to work fairly well.
+static const size_t ComplexityLimit = UINT16_MAX;
+
+/// EstimateSearchSpaceComplexity - Estimate the worst-case number of
+/// solutions the solver might have to consider. It almost never considers
+/// this many solutions because it prune the search space, but the pruning
+/// isn't always sufficient.
+size_t LSRInstance::EstimateSearchSpaceComplexity() const {
+ uint32_t Power = 1;
+ for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+ E = Uses.end(); I != E; ++I) {
+ size_t FSize = I->Formulae.size();
+ if (FSize >= ComplexityLimit) {
+ Power = ComplexityLimit;
+ break;
+ }
+ Power *= FSize;
+ if (Power >= ComplexityLimit)
+ break;
+ }
+ return Power;
+}
+
+/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset
+/// of the registers of another formula, it won't help reduce register
+/// pressure (though it may not necessarily hurt register pressure); remove
+/// it to simplify the system.
+void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
+ if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+ DEBUG(dbgs() << "The search space is too complex.\n");
+
+ DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
+ "which use a superset of registers used by other "
+ "formulae.\n");
+
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ bool Any = false;
+ for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
+ Formula &F = LU.Formulae[i];
+ // Look for a formula with a constant or GV in a register. If the use
+ // also has a formula with that same value in an immediate field,
+ // delete the one that uses a register.
+ for (SmallVectorImpl<const SCEV *>::const_iterator
+ I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
+ Formula NewF = F;
+ NewF.AM.BaseOffs += C->getValue()->getSExtValue();
+ NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
+ (I - F.BaseRegs.begin()));
+ if (LU.HasFormulaWithSameRegs(NewF)) {
+ DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
+ LU.DeleteFormula(F);
+ --i;
+ --e;
+ Any = true;
+ break;
+ }
+ } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
+ if (!F.AM.BaseGV) {
+ Formula NewF = F;
+ NewF.AM.BaseGV = GV;
+ NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
+ (I - F.BaseRegs.begin()));
+ if (LU.HasFormulaWithSameRegs(NewF)) {
+ DEBUG(dbgs() << " Deleting "; F.print(dbgs());
+ dbgs() << '\n');
+ LU.DeleteFormula(F);
+ --i;
+ --e;
+ Any = true;
+ break;
+ }
+ }
+ }
+ }
+ }
+ if (Any)
+ LU.RecomputeRegs(LUIdx, RegUses);
+ }
+
+ DEBUG(dbgs() << "After pre-selection:\n";
+ print_uses(dbgs()));
+ }
+}
+
+/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers
+/// for expressions like A, A+1, A+2, etc., allocate a single register for
+/// them.
+void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
+ if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+ DEBUG(dbgs() << "The search space is too complex.\n");
+
+ DEBUG(dbgs() << "Narrowing the search space by assuming that uses "
+ "separated by a constant offset will use the same "
+ "registers.\n");
+
+ // This is especially useful for unrolled loops.
+
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+ E = LU.Formulae.end(); I != E; ++I) {
+ const Formula &F = *I;
+ if (F.AM.BaseOffs != 0 && F.AM.Scale == 0) {
+ if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) {
+ if (reconcileNewOffset(*LUThatHas, F.AM.BaseOffs,
+ /*HasBaseReg=*/false,
+ LU.Kind, LU.AccessTy)) {
+ DEBUG(dbgs() << " Deleting use "; LU.print(dbgs());
+ dbgs() << '\n');
+
+ LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
+
+ // Delete formulae from the new use which are no longer legal.
+ bool Any = false;
+ for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
+ Formula &F = LUThatHas->Formulae[i];
+ if (!isLegalUse(F.AM,
+ LUThatHas->MinOffset, LUThatHas->MaxOffset,
+ LUThatHas->Kind, LUThatHas->AccessTy, TLI)) {
+ DEBUG(dbgs() << " Deleting "; F.print(dbgs());
+ dbgs() << '\n');
+ LUThatHas->DeleteFormula(F);
+ --i;
+ --e;
+ Any = true;
+ }
+ }
+ if (Any)
+ LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
+
+ // Update the relocs to reference the new use.
+ for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
+ E = Fixups.end(); I != E; ++I) {
+ LSRFixup &Fixup = *I;
+ if (Fixup.LUIdx == LUIdx) {
+ Fixup.LUIdx = LUThatHas - &Uses.front();
+ Fixup.Offset += F.AM.BaseOffs;
+ DEBUG(dbgs() << "New fixup has offset "
+ << Fixup.Offset << '\n');
+ }
+ if (Fixup.LUIdx == NumUses-1)
+ Fixup.LUIdx = LUIdx;
+ }
+
+ // Delete the old use.
+ DeleteUse(LU);
+ --LUIdx;
+ --NumUses;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "After pre-selection:\n";
+ print_uses(dbgs()));
+ }
+}
+
+/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
+/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
+/// we've done more filtering, as it may be able to find more formulae to
+/// eliminate.
+void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
+ if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+ DEBUG(dbgs() << "The search space is too complex.\n");
+
+ DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
+ "undesirable dedicated registers.\n");
+
+ FilterOutUndesirableDedicatedRegisters();
+
+ DEBUG(dbgs() << "After pre-selection:\n";
+ print_uses(dbgs()));
+ }
+}
+
+/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely
+/// to be profitable, and then in any use which has any reference to that
+/// register, delete all formulae which do not reference that register.
+void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
+ // With all other options exhausted, loop until the system is simple
+ // enough to handle.
+ SmallPtrSet<const SCEV *, 4> Taken;
+ while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+ // Ok, we have too many of formulae on our hands to conveniently handle.
+ // Use a rough heuristic to thin out the list.
+ DEBUG(dbgs() << "The search space is too complex.\n");
+
+ // Pick the register which is used by the most LSRUses, which is likely
+ // to be a good reuse register candidate.
+ const SCEV *Best = 0;
+ unsigned BestNum = 0;
+ for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
+ I != E; ++I) {
+ const SCEV *Reg = *I;
+ if (Taken.count(Reg))
+ continue;
+ if (!Best)
+ Best = Reg;
+ else {
+ unsigned Count = RegUses.getUsedByIndices(Reg).count();
+ if (Count > BestNum) {
+ Best = Reg;
+ BestNum = Count;
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
+ << " will yield profitable reuse.\n");
+ Taken.insert(Best);
+
+ // In any use with formulae which references this register, delete formulae
+ // which don't reference it.
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ if (!LU.Regs.count(Best)) continue;
+
+ bool Any = false;
+ for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
+ Formula &F = LU.Formulae[i];
+ if (!F.referencesReg(Best)) {
+ DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
+ LU.DeleteFormula(F);
+ --e;
+ --i;
+ Any = true;
+ assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");
+ continue;
+ }
+ }
+
+ if (Any)
+ LU.RecomputeRegs(LUIdx, RegUses);
+ }
+
+ DEBUG(dbgs() << "After pre-selection:\n";
+ print_uses(dbgs()));
+ }
+}
+
+/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
+/// formulae to choose from, use some rough heuristics to prune down the number
+/// of formulae. This keeps the main solver from taking an extraordinary amount
+/// of time in some worst-case scenarios.
+void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
+ NarrowSearchSpaceByDetectingSupersets();
+ NarrowSearchSpaceByCollapsingUnrolledCode();
+ NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+ NarrowSearchSpaceByPickingWinnerRegs();
+}
+
+/// SolveRecurse - This is the recursive solver.
+void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
+ Cost &SolutionCost,
+ SmallVectorImpl<const Formula *> &Workspace,
+ const Cost &CurCost,
+ const SmallPtrSet<const SCEV *, 16> &CurRegs,
+ DenseSet<const SCEV *> &VisitedRegs) const {
+ // Some ideas:
+ // - prune more:
+ // - use more aggressive filtering
+ // - sort the formula so that the most profitable solutions are found first
+ // - sort the uses too
+ // - search faster:
+ // - don't compute a cost, and then compare. compare while computing a cost
+ // and bail early.
+ // - track register sets with SmallBitVector
+
+ const LSRUse &LU = Uses[Workspace.size()];
+
+ // If this use references any register that's already a part of the
+ // in-progress solution, consider it a requirement that a formula must
+ // reference that register in order to be considered. This prunes out
+ // unprofitable searching.
+ SmallSetVector<const SCEV *, 4> ReqRegs;
+ for (SmallPtrSet<const SCEV *, 16>::const_iterator I = CurRegs.begin(),
+ E = CurRegs.end(); I != E; ++I)
+ if (LU.Regs.count(*I))
+ ReqRegs.insert(*I);
+
+ bool AnySatisfiedReqRegs = false;
+ SmallPtrSet<const SCEV *, 16> NewRegs;
+ Cost NewCost;
+retry:
+ for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+ E = LU.Formulae.end(); I != E; ++I) {
+ const Formula &F = *I;
+
+ // Ignore formulae which do not use any of the required registers.
+ for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
+ JE = ReqRegs.end(); J != JE; ++J) {
+ const SCEV *Reg = *J;
+ if ((!F.ScaledReg || F.ScaledReg != Reg) &&
+ std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) ==
+ F.BaseRegs.end())
+ goto skip;
+ }
+ AnySatisfiedReqRegs = true;
+
+ // Evaluate the cost of the current formula. If it's already worse than
+ // the current best, prune the search at that point.
+ NewCost = CurCost;
+ NewRegs = CurRegs;
+ NewCost.RateFormula(F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT);
+ if (NewCost < SolutionCost) {
+ Workspace.push_back(&F);
+ if (Workspace.size() != Uses.size()) {
+ SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
+ NewRegs, VisitedRegs);
+ if (F.getNumRegs() == 1 && Workspace.size() == 1)
+ VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
+ } else {
+ DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
+ dbgs() << ". Regs:";
+ for (SmallPtrSet<const SCEV *, 16>::const_iterator
+ I = NewRegs.begin(), E = NewRegs.end(); I != E; ++I)
+ dbgs() << ' ' << **I;
+ dbgs() << '\n');
+
+ SolutionCost = NewCost;
+ Solution = Workspace;
+ }
+ Workspace.pop_back();
+ }
+ skip:;
+ }
+
+ // If none of the formulae had all of the required registers, relax the
+ // constraint so that we don't exclude all formulae.
+ if (!AnySatisfiedReqRegs) {
+ assert(!ReqRegs.empty() && "Solver failed even without required registers");
+ ReqRegs.clear();
+ goto retry;
+ }
+}
+
+/// Solve - Choose one formula from each use. Return the results in the given
+/// Solution vector.
+void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
+ SmallVector<const Formula *, 8> Workspace;
+ Cost SolutionCost;
+ SolutionCost.Loose();
+ Cost CurCost;
+ SmallPtrSet<const SCEV *, 16> CurRegs;
+ DenseSet<const SCEV *> VisitedRegs;
+ Workspace.reserve(Uses.size());
+
+ // SolveRecurse does all the work.
+ SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
+ CurRegs, VisitedRegs);
+
+ // Ok, we've now made all our decisions.
+ DEBUG(dbgs() << "\n"
+ "The chosen solution requires "; SolutionCost.print(dbgs());
+ dbgs() << ":\n";
+ for (size_t i = 0, e = Uses.size(); i != e; ++i) {
+ dbgs() << " ";
+ Uses[i].print(dbgs());
+ dbgs() << "\n"
+ " ";
+ Solution[i]->print(dbgs());
+ dbgs() << '\n';
+ });
+
+ assert(Solution.size() == Uses.size() && "Malformed solution!");
+}
+
+/// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up
+/// the dominator tree far as we can go while still being dominated by the
+/// input positions. This helps canonicalize the insert position, which
+/// encourages sharing.
+BasicBlock::iterator
+LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
+ const SmallVectorImpl<Instruction *> &Inputs)
+ const {
+ for (;;) {
+ const Loop *IPLoop = LI.getLoopFor(IP->getParent());
+ unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
+
+ BasicBlock *IDom;
+ for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
+ if (!Rung) return IP;
+ Rung = Rung->getIDom();
+ if (!Rung) return IP;
+ IDom = Rung->getBlock();
+
+ // Don't climb into a loop though.
+ const Loop *IDomLoop = LI.getLoopFor(IDom);
+ unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
+ if (IDomDepth <= IPLoopDepth &&
+ (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
+ break;
+ }
+
+ bool AllDominate = true;
+ Instruction *BetterPos = 0;
+ Instruction *Tentative = IDom->getTerminator();
+ for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
+ E = Inputs.end(); I != E; ++I) {
+ Instruction *Inst = *I;
+ if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
+ AllDominate = false;
+ break;
+ }
+ // Attempt to find an insert position in the middle of the block,
+ // instead of at the end, so that it can be used for other expansions.
+ if (IDom == Inst->getParent() &&
+ (!BetterPos || DT.dominates(BetterPos, Inst)))
+ BetterPos = llvm::next(BasicBlock::iterator(Inst));
+ }
+ if (!AllDominate)
+ break;
+ if (BetterPos)
+ IP = BetterPos;
+ else
+ IP = Tentative;
+ }
+
+ return IP;
+}
+
+/// AdjustInsertPositionForExpand - Determine an input position which will be
+/// dominated by the operands and which will dominate the result.
+BasicBlock::iterator
+LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
+ const LSRFixup &LF,
+ const LSRUse &LU) const {
+ // Collect some instructions which must be dominated by the
+ // expanding replacement. These must be dominated by any operands that
+ // will be required in the expansion.
+ SmallVector<Instruction *, 4> Inputs;
+ if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
+ Inputs.push_back(I);
+ if (LU.Kind == LSRUse::ICmpZero)
+ if (Instruction *I =
+ dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
+ Inputs.push_back(I);
+ if (LF.PostIncLoops.count(L)) {
+ if (LF.isUseFullyOutsideLoop(L))
+ Inputs.push_back(L->getLoopLatch()->getTerminator());
+ else
+ Inputs.push_back(IVIncInsertPos);
+ }
+ // The expansion must also be dominated by the increment positions of any
+ // loops it for which it is using post-inc mode.
+ for (PostIncLoopSet::const_iterator I = LF.PostIncLoops.begin(),
+ E = LF.PostIncLoops.end(); I != E; ++I) {
+ const Loop *PIL = *I;
+ if (PIL == L) continue;
+
+ // Be dominated by the loop exit.
+ SmallVector<BasicBlock *, 4> ExitingBlocks;
+ PIL->getExitingBlocks(ExitingBlocks);
+ if (!ExitingBlocks.empty()) {
+ BasicBlock *BB = ExitingBlocks[0];
+ for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
+ BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
+ Inputs.push_back(BB->getTerminator());
+ }
+ }
+
+ // Then, climb up the immediate dominator tree as far as we can go while
+ // still being dominated by the input positions.
+ IP = HoistInsertPosition(IP, Inputs);
+
+ // Don't insert instructions before PHI nodes.
+ while (isa<PHINode>(IP)) ++IP;
+
+ // Ignore debug intrinsics.
+ while (isa<DbgInfoIntrinsic>(IP)) ++IP;
+
+ return IP;
+}
+
+/// Expand - Emit instructions for the leading candidate expression for this
+/// LSRUse (this is called "expanding").
+Value *LSRInstance::Expand(const LSRFixup &LF,
+ const Formula &F,
+ BasicBlock::iterator IP,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts) const {
+ const LSRUse &LU = Uses[LF.LUIdx];
+
+ // Determine an input position which will be dominated by the operands and
+ // which will dominate the result.
+ IP = AdjustInsertPositionForExpand(IP, LF, LU);
+
+ // Inform the Rewriter if we have a post-increment use, so that it can
+ // perform an advantageous expansion.
+ Rewriter.setPostInc(LF.PostIncLoops);
+
+ // This is the type that the user actually needs.
+ const Type *OpTy = LF.OperandValToReplace->getType();
+ // This will be the type that we'll initially expand to.
+ const Type *Ty = F.getType();
+ if (!Ty)
+ // No type known; just expand directly to the ultimate type.
+ Ty = OpTy;
+ else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
+ // Expand directly to the ultimate type if it's the right size.
+ Ty = OpTy;
+ // This is the type to do integer arithmetic in.
+ const Type *IntTy = SE.getEffectiveSCEVType(Ty);
+
+ // Build up a list of operands to add together to form the full base.
+ SmallVector<const SCEV *, 8> Ops;
+
+ // Expand the BaseRegs portion.
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+ E = F.BaseRegs.end(); I != E; ++I) {
+ const SCEV *Reg = *I;
+ assert(!Reg->isZero() && "Zero allocated in a base register!");
+
+ // If we're expanding for a post-inc user, make the post-inc adjustment.
+ PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
+ Reg = TransformForPostIncUse(Denormalize, Reg,
+ LF.UserInst, LF.OperandValToReplace,
+ Loops, SE, DT);
+
+ Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
+ }
+
+ // Flush the operand list to suppress SCEVExpander hoisting.
+ if (!Ops.empty()) {
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
+
+ // Expand the ScaledReg portion.
+ Value *ICmpScaledV = 0;
+ if (F.AM.Scale != 0) {
+ const SCEV *ScaledS = F.ScaledReg;
+
+ // If we're expanding for a post-inc user, make the post-inc adjustment.
+ PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
+ ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
+ LF.UserInst, LF.OperandValToReplace,
+ Loops, SE, DT);
+
+ if (LU.Kind == LSRUse::ICmpZero) {
+ // An interesting way of "folding" with an icmp is to use a negated
+ // scale, which we'll implement by inserting it into the other operand
+ // of the icmp.
+ assert(F.AM.Scale == -1 &&
+ "The only scale supported by ICmpZero uses is -1!");
+ ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
+ } else {
+ // Otherwise just expand the scaled register and an explicit scale,
+ // which is expected to be matched as part of the address.
+ ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
+ ScaledS = SE.getMulExpr(ScaledS,
+ SE.getConstant(ScaledS->getType(), F.AM.Scale));
+ Ops.push_back(ScaledS);
+
+ // Flush the operand list to suppress SCEVExpander hoisting.
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
+ }
+
+ // Expand the GV portion.
+ if (F.AM.BaseGV) {
+ Ops.push_back(SE.getUnknown(F.AM.BaseGV));
+
+ // Flush the operand list to suppress SCEVExpander hoisting.
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
+
+ // Expand the immediate portion.
+ int64_t Offset = (uint64_t)F.AM.BaseOffs + LF.Offset;
+ if (Offset != 0) {
+ if (LU.Kind == LSRUse::ICmpZero) {
+ // The other interesting way of "folding" with an ICmpZero is to use a
+ // negated immediate.
+ if (!ICmpScaledV)
+ ICmpScaledV = ConstantInt::get(IntTy, -Offset);
+ else {
+ Ops.push_back(SE.getUnknown(ICmpScaledV));
+ ICmpScaledV = ConstantInt::get(IntTy, Offset);
+ }
+ } else {
+ // Just add the immediate values. These again are expected to be matched
+ // as part of the address.
+ Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
+ }
+ }
+
+ // Emit instructions summing all the operands.
+ const SCEV *FullS = Ops.empty() ?
+ SE.getConstant(IntTy, 0) :
+ SE.getAddExpr(Ops);
+ Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
+
+ // We're done expanding now, so reset the rewriter.
+ Rewriter.clearPostInc();
+
+ // An ICmpZero Formula represents an ICmp which we're handling as a
+ // comparison against zero. Now that we've expanded an expression for that
+ // form, update the ICmp's other operand.
+ if (LU.Kind == LSRUse::ICmpZero) {
+ ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
+ DeadInsts.push_back(CI->getOperand(1));
+ assert(!F.AM.BaseGV && "ICmp does not support folding a global value and "
+ "a scale at the same time!");
+ if (F.AM.Scale == -1) {
+ if (ICmpScaledV->getType() != OpTy) {
+ Instruction *Cast =
+ CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
+ OpTy, false),
+ ICmpScaledV, OpTy, "tmp", CI);
+ ICmpScaledV = Cast;
+ }
+ CI->setOperand(1, ICmpScaledV);
+ } else {
+ assert(F.AM.Scale == 0 &&
+ "ICmp does not support folding a global value and "
+ "a scale at the same time!");
+ Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
+ -(uint64_t)Offset);
+ if (C->getType() != OpTy)
+ C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+ OpTy, false),
+ C, OpTy);
+
+ CI->setOperand(1, C);
+ }
+ }
+
+ return FullV;
+}
+
+/// RewriteForPHI - Helper for Rewrite. PHI nodes are special because the use
+/// of their operands effectively happens in their predecessor blocks, so the
+/// expression may need to be expanded in multiple places.
+void LSRInstance::RewriteForPHI(PHINode *PN,
+ const LSRFixup &LF,
+ const Formula &F,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ Pass *P) const {
+ DenseMap<BasicBlock *, Value *> Inserted;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
+ BasicBlock *BB = PN->getIncomingBlock(i);
+
+ // If this is a critical edge, split the edge so that we do not insert
+ // the code on all predecessor/successor paths. We do this unless this
+ // is the canonical backedge for this loop, which complicates post-inc
+ // users.
+ if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
+ !isa<IndirectBrInst>(BB->getTerminator()) &&
+ (PN->getParent() != L->getHeader() || !L->contains(BB))) {
+ // Split the critical edge.
+ BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
+
+ // If PN is outside of the loop and BB is in the loop, we want to
+ // move the block to be immediately before the PHI block, not
+ // immediately after BB.
+ if (L->contains(BB) && !L->contains(PN))
+ NewBB->moveBefore(PN->getParent());
+
+ // Splitting the edge can reduce the number of PHI entries we have.
+ e = PN->getNumIncomingValues();
+ BB = NewBB;
+ i = PN->getBasicBlockIndex(BB);
+ }
+
+ std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
+ Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
+ if (!Pair.second)
+ PN->setIncomingValue(i, Pair.first->second);
+ else {
+ Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
+
+ // If this is reuse-by-noop-cast, insert the noop cast.
+ const Type *OpTy = LF.OperandValToReplace->getType();
+ if (FullV->getType() != OpTy)
+ FullV =
+ CastInst::Create(CastInst::getCastOpcode(FullV, false,
+ OpTy, false),
+ FullV, LF.OperandValToReplace->getType(),
+ "tmp", BB->getTerminator());
+
+ PN->setIncomingValue(i, FullV);
+ Pair.first->second = FullV;
+ }
+ }
+}
+
+/// Rewrite - Emit instructions for the leading candidate expression for this
+/// LSRUse (this is called "expanding"), and update the UserInst to reference
+/// the newly expanded value.
+void LSRInstance::Rewrite(const LSRFixup &LF,
+ const Formula &F,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ Pass *P) const {
+ // First, find an insertion point that dominates UserInst. For PHI nodes,
+ // find the nearest block which dominates all the relevant uses.
+ if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
+ RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P);
+ } else {
+ Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
+
+ // If this is reuse-by-noop-cast, insert the noop cast.
+ const Type *OpTy = LF.OperandValToReplace->getType();
+ if (FullV->getType() != OpTy) {
+ Instruction *Cast =
+ CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
+ FullV, OpTy, "tmp", LF.UserInst);
+ FullV = Cast;
+ }
+
+ // Update the user. ICmpZero is handled specially here (for now) because
+ // Expand may have updated one of the operands of the icmp already, and
+ // its new value may happen to be equal to LF.OperandValToReplace, in
+ // which case doing replaceUsesOfWith leads to replacing both operands
+ // with the same value. TODO: Reorganize this.
+ if (Uses[LF.LUIdx].Kind == LSRUse::ICmpZero)
+ LF.UserInst->setOperand(0, FullV);
+ else
+ LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
+ }
+
+ DeadInsts.push_back(LF.OperandValToReplace);
+}
+
+/// ImplementSolution - Rewrite all the fixup locations with new values,
+/// following the chosen solution.
+void
+LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
+ Pass *P) {
+ // Keep track of instructions we may have made dead, so that
+ // we can remove them after we are done working.
+ SmallVector<WeakVH, 16> DeadInsts;
+
+ SCEVExpander Rewriter(SE);
+ Rewriter.disableCanonicalMode();
+ Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
+
+ // Expand the new value definitions and update the users.
+ for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
+ E = Fixups.end(); I != E; ++I) {
+ const LSRFixup &Fixup = *I;
+
+ Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P);
+
+ Changed = true;
+ }
+
+ // Clean up after ourselves. This must be done before deleting any
+ // instructions.
+ Rewriter.clear();
+
+ Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
+}
+
+LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
+ : IU(P->getAnalysis<IVUsers>()),
+ SE(P->getAnalysis<ScalarEvolution>()),
+ DT(P->getAnalysis<DominatorTree>()),
+ LI(P->getAnalysis<LoopInfo>()),
+ TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
+
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->isLoopSimplifyForm()) return;
+
+ // If there's no interesting work to be done, bail early.
+ if (IU.empty()) return;
+
+ DEBUG(dbgs() << "\nLSR on loop ";
+ WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
+ dbgs() << ":\n");
+
+ // First, perform some low-level loop optimizations.
+ OptimizeShadowIV();
+ OptimizeLoopTermCond();
+
+ // Start collecting data and preparing for the solver.
+ CollectInterestingTypesAndFactors();
+ CollectFixupsAndInitialFormulae();
+ CollectLoopInvariantFixupsAndFormulae();
+
+ DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
+ print_uses(dbgs()));
+
+ // Now use the reuse data to generate a bunch of interesting ways
+ // to formulate the values needed for the uses.
+ GenerateAllReuseFormulae();
+
+ FilterOutUndesirableDedicatedRegisters();
+ NarrowSearchSpaceUsingHeuristics();
+
+ SmallVector<const Formula *, 8> Solution;
+ Solve(Solution);
+
+ // Release memory that is no longer needed.
+ Factors.clear();
+ Types.clear();
+ RegUses.clear();
+
+#ifndef NDEBUG
+ // Formulae should be legal.
+ for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+ E = Uses.end(); I != E; ++I) {
+ const LSRUse &LU = *I;
+ for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+ JE = LU.Formulae.end(); J != JE; ++J)
+ assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset,
+ LU.Kind, LU.AccessTy, TLI) &&
+ "Illegal formula generated!");
+ };
+#endif
+
+ // Now that we've decided what we want, make it so.
+ ImplementSolution(Solution, P);
+}
+
+void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
+ if (Factors.empty() && Types.empty()) return;
+
+ OS << "LSR has identified the following interesting factors and types: ";
+ bool First = true;
+
+ for (SmallSetVector<int64_t, 8>::const_iterator
+ I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+ if (!First) OS << ", ";
+ First = false;
+ OS << '*' << *I;
+ }
+
+ for (SmallSetVector<const Type *, 4>::const_iterator
+ I = Types.begin(), E = Types.end(); I != E; ++I) {
+ if (!First) OS << ", ";
+ First = false;
+ OS << '(' << **I << ')';
+ }
+ OS << '\n';
+}
+
+void LSRInstance::print_fixups(raw_ostream &OS) const {
+ OS << "LSR is examining the following fixup sites:\n";
+ for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
+ E = Fixups.end(); I != E; ++I) {
+ dbgs() << " ";
+ I->print(OS);
+ OS << '\n';
+ }
+}
+
+void LSRInstance::print_uses(raw_ostream &OS) const {
+ OS << "LSR is examining the following uses:\n";
+ for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+ E = Uses.end(); I != E; ++I) {
+ const LSRUse &LU = *I;
+ dbgs() << " ";
+ LU.print(OS);
+ OS << '\n';
+ for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+ JE = LU.Formulae.end(); J != JE; ++J) {
+ OS << " ";
+ J->print(OS);
+ OS << '\n';
+ }
+ }
+}
+
+void LSRInstance::print(raw_ostream &OS) const {
+ print_factors_and_types(OS);
+ print_fixups(OS);
+ print_uses(OS);
+}
+
+void LSRInstance::dump() const {
+ print(errs()); errs() << '\n';
+}
+
+namespace {
+
+class LoopStrengthReduce : public LoopPass {
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// transformation profitability.
+ const TargetLowering *const TLI;
+
+public:
+ static char ID; // Pass ID, replacement for typeid
+ explicit LoopStrengthReduce(const TargetLowering *tli = 0);
+
+private:
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+};
+
+}
+
+char LoopStrengthReduce::ID = 0;
+INITIALIZE_PASS(LoopStrengthReduce, "loop-reduce",
+ "Loop Strength Reduction", false, false);
+
+Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
+ return new LoopStrengthReduce(TLI);
+}
+
+LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
+ : LoopPass(ID), TLI(tli) {}
+
+void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
+ // We split critical edges, so we change the CFG. However, we do update
+ // many analyses if they are around.
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreserved("domfrontier");
+
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<IVUsers>();
+ AU.addPreserved<IVUsers>();
+}
+
+bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
+ bool Changed = false;
+
+ // Run the main LSR transformation.
+ Changed |= LSRInstance(TLI, L, this).getChanged();
+
+ // At this point, it is worth checking to see if any recurrence PHIs are also
+ // dead, so that we can remove them as well.
+ Changed |= DeleteDeadPHIs(L->getHeader());
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
new file mode 100644
index 0000000..d0edfa2
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -0,0 +1,153 @@
+//===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements a simple loop unroller. It works best when loops have
+// been canonicalized by the -indvars pass, allowing it to determine the trip
+// counts of loops easily.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include <climits>
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+UnrollThreshold("unroll-threshold", cl::init(200), cl::Hidden,
+ cl::desc("The cut-off point for automatic loop unrolling"));
+
+static cl::opt<unsigned>
+UnrollCount("unroll-count", cl::init(0), cl::Hidden,
+ cl::desc("Use this unroll count for all loops, for testing purposes"));
+
+static cl::opt<bool>
+UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
+ cl::desc("Allows loops to be partially unrolled until "
+ "-unroll-threshold loop size is reached."));
+
+namespace {
+ class LoopUnroll : public LoopPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopUnroll() : LoopPass(ID) {}
+
+ /// A magic value for use with the Threshold parameter to indicate
+ /// that the loop unroll should be performed regardless of how much
+ /// code expansion would result.
+ static const unsigned NoThreshold = UINT_MAX;
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG...
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved<ScalarEvolution>();
+ // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
+ // If loop unroll does not preserve dom info then LCSSA pass on next
+ // loop will receive invalid dom info.
+ // For now, recreate dom info, if loop is unrolled.
+ AU.addPreserved<DominatorTree>();
+ }
+ };
+}
+
+char LoopUnroll::ID = 0;
+INITIALIZE_PASS(LoopUnroll, "loop-unroll", "Unroll loops", false, false);
+
+Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
+
+/// ApproximateLoopSize - Approximate the size of the loop.
+static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) {
+ CodeMetrics Metrics;
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I)
+ Metrics.analyzeBasicBlock(*I);
+ NumCalls = Metrics.NumCalls;
+ return Metrics.NumInsts;
+}
+
+bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
+ LoopInfo *LI = &getAnalysis<LoopInfo>();
+
+ BasicBlock *Header = L->getHeader();
+ DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
+ << "] Loop %" << Header->getName() << "\n");
+ (void)Header;
+
+ // Find trip count
+ unsigned TripCount = L->getSmallConstantTripCount();
+ unsigned Count = UnrollCount;
+
+ // Automatically select an unroll count.
+ if (Count == 0) {
+ // Conservative heuristic: if we know the trip count, see if we can
+ // completely unroll (subject to the threshold, checked below); otherwise
+ // try to find greatest modulo of the trip count which is still under
+ // threshold value.
+ if (TripCount == 0)
+ return false;
+ Count = TripCount;
+ }
+
+ // Enforce the threshold.
+ if (UnrollThreshold != NoThreshold) {
+ unsigned NumCalls;
+ unsigned LoopSize = ApproximateLoopSize(L, NumCalls);
+ DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
+ if (NumCalls != 0) {
+ DEBUG(dbgs() << " Not unrolling loop with function calls.\n");
+ return false;
+ }
+ uint64_t Size = (uint64_t)LoopSize*Count;
+ if (TripCount != 1 && Size > UnrollThreshold) {
+ DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
+ << " because size: " << Size << ">" << UnrollThreshold << "\n");
+ if (!UnrollAllowPartial) {
+ DEBUG(dbgs() << " will not try to unroll partially because "
+ << "-unroll-allow-partial not given\n");
+ return false;
+ }
+ // Reduce unroll count to be modulo of TripCount for partial unrolling
+ Count = UnrollThreshold / LoopSize;
+ while (Count != 0 && TripCount%Count != 0) {
+ Count--;
+ }
+ if (Count < 2) {
+ DEBUG(dbgs() << " could not unroll partially\n");
+ return false;
+ }
+ DEBUG(dbgs() << " partially unrolling with count: " << Count << "\n");
+ }
+ }
+
+ // Unroll the loop.
+ Function *F = L->getHeader()->getParent();
+ if (!UnrollLoop(L, Count, LI, &LPM))
+ return false;
+
+ // FIXME: Reconstruct dom info, because it is not preserved properly.
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
+ DT->runOnFunction(*F);
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
new file mode 100644
index 0000000..9afe428
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -0,0 +1,1051 @@
+//===-- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms loops that contain branches on loop-invariant conditions
+// to have multiple loops. For example, it turns the left into the right code:
+//
+// for (...) if (lic)
+// A for (...)
+// if (lic) A; B; C
+// B else
+// C for (...)
+// A; C
+//
+// This can increase the size of the code exponentially (doubling it every time
+// a loop is unswitched) so we only unswitch if the resultant code will be
+// smaller than a threshold.
+//
+// This pass expects LICM to be run before it to hoist invariant conditions out
+// of the loop, to make the unswitching opportunity obvious.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unswitch"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumBranches, "Number of branches unswitched");
+STATISTIC(NumSwitches, "Number of switches unswitched");
+STATISTIC(NumSelects , "Number of selects unswitched");
+STATISTIC(NumTrivial , "Number of unswitches that are trivial");
+STATISTIC(NumSimplify, "Number of simplifications of unswitched code");
+
+// The specific value of 50 here was chosen based only on intuition and a
+// few specific examples.
+static cl::opt<unsigned>
+Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
+ cl::init(50), cl::Hidden);
+
+namespace {
+ class LoopUnswitch : public LoopPass {
+ LoopInfo *LI; // Loop information
+ LPPassManager *LPM;
+
+ // LoopProcessWorklist - Used to check if second loop needs processing
+ // after RewriteLoopBodyWithConditionConstant rewrites first loop.
+ std::vector<Loop*> LoopProcessWorklist;
+ SmallPtrSet<Value *,8> UnswitchedVals;
+
+ bool OptimizeForSize;
+ bool redoLoop;
+
+ Loop *currentLoop;
+ DominatorTree *DT;
+ BasicBlock *loopHeader;
+ BasicBlock *loopPreheader;
+
+ // LoopBlocks contains all of the basic blocks of the loop, including the
+ // preheader of the loop, the body of the loop, and the exit blocks of the
+ // loop, in that order.
+ std::vector<BasicBlock*> LoopBlocks;
+ // NewBlocks contained cloned copy of basic blocks from LoopBlocks.
+ std::vector<BasicBlock*> NewBlocks;
+
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ explicit LoopUnswitch(bool Os = false) :
+ LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
+ currentLoop(NULL), DT(NULL), loopHeader(NULL),
+ loopPreheader(NULL) {}
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool processCurrentLoop();
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG.
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved<DominatorTree>();
+ }
+
+ private:
+
+ virtual void releaseMemory() {
+ UnswitchedVals.clear();
+ }
+
+ /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist,
+ /// remove it.
+ void RemoveLoopFromWorklist(Loop *L) {
+ std::vector<Loop*>::iterator I = std::find(LoopProcessWorklist.begin(),
+ LoopProcessWorklist.end(), L);
+ if (I != LoopProcessWorklist.end())
+ LoopProcessWorklist.erase(I);
+ }
+
+ void initLoopData() {
+ loopHeader = currentLoop->getHeader();
+ loopPreheader = currentLoop->getLoopPreheader();
+ }
+
+ /// Split all of the edges from inside the loop to their exit blocks.
+ /// Update the appropriate Phi nodes as we do so.
+ void SplitExitEdges(Loop *L, const SmallVector<BasicBlock *, 8> &ExitBlocks);
+
+ bool UnswitchIfProfitable(Value *LoopCond, Constant *Val);
+ void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
+ BasicBlock *ExitBlock);
+ void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L);
+
+ void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
+ Constant *Val, bool isEqual);
+
+ void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
+ BasicBlock *TrueDest,
+ BasicBlock *FalseDest,
+ Instruction *InsertPt);
+
+ void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
+ void RemoveBlockIfDead(BasicBlock *BB,
+ std::vector<Instruction*> &Worklist, Loop *l);
+ void RemoveLoopFromHierarchy(Loop *L);
+ bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = 0,
+ BasicBlock **LoopExit = 0);
+
+ };
+}
+char LoopUnswitch::ID = 0;
+INITIALIZE_PASS(LoopUnswitch, "loop-unswitch", "Unswitch loops", false, false);
+
+Pass *llvm::createLoopUnswitchPass(bool Os) {
+ return new LoopUnswitch(Os);
+}
+
+/// FindLIVLoopCondition - Cond is a condition that occurs in L. If it is
+/// invariant in the loop, or has an invariant piece, return the invariant.
+/// Otherwise, return null.
+static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
+ // We can never unswitch on vector conditions.
+ if (Cond->getType()->isVectorTy())
+ return 0;
+
+ // Constants should be folded, not unswitched on!
+ if (isa<Constant>(Cond)) return 0;
+
+ // TODO: Handle: br (VARIANT|INVARIANT).
+
+ // Hoist simple values out.
+ if (L->makeLoopInvariant(Cond, Changed))
+ return Cond;
+
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond))
+ if (BO->getOpcode() == Instruction::And ||
+ BO->getOpcode() == Instruction::Or) {
+ // If either the left or right side is invariant, we can unswitch on this,
+ // which will cause the branch to go away in one loop and the condition to
+ // simplify in the other one.
+ if (Value *LHS = FindLIVLoopCondition(BO->getOperand(0), L, Changed))
+ return LHS;
+ if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed))
+ return RHS;
+ }
+
+ return 0;
+}
+
+bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
+ LI = &getAnalysis<LoopInfo>();
+ LPM = &LPM_Ref;
+ DT = getAnalysisIfAvailable<DominatorTree>();
+ currentLoop = L;
+ Function *F = currentLoop->getHeader()->getParent();
+ bool Changed = false;
+ do {
+ assert(currentLoop->isLCSSAForm(*DT));
+ redoLoop = false;
+ Changed |= processCurrentLoop();
+ } while(redoLoop);
+
+ if (Changed) {
+ // FIXME: Reconstruct dom info, because it is not preserved properly.
+ if (DT)
+ DT->runOnFunction(*F);
+ }
+ return Changed;
+}
+
+/// processCurrentLoop - Do actual work and unswitch loop if possible
+/// and profitable.
+bool LoopUnswitch::processCurrentLoop() {
+ bool Changed = false;
+ LLVMContext &Context = currentLoop->getHeader()->getContext();
+
+ // Loop over all of the basic blocks in the loop. If we find an interior
+ // block that is branching on a loop-invariant condition, we can unswitch this
+ // loop.
+ for (Loop::block_iterator I = currentLoop->block_begin(),
+ E = currentLoop->block_end(); I != E; ++I) {
+ TerminatorInst *TI = (*I)->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ // If this isn't branching on an invariant condition, we can't unswitch
+ // it.
+ if (BI->isConditional()) {
+ // See if this, or some part of it, is loop invariant. If so, we can
+ // unswitch on it if we desire.
+ Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
+ currentLoop, Changed);
+ if (LoopCond && UnswitchIfProfitable(LoopCond,
+ ConstantInt::getTrue(Context))) {
+ ++NumBranches;
+ return true;
+ }
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
+ currentLoop, Changed);
+ if (LoopCond && SI->getNumCases() > 1) {
+ // Find a value to unswitch on:
+ // FIXME: this should chose the most expensive case!
+ Constant *UnswitchVal = SI->getCaseValue(1);
+ // Do not process same value again and again.
+ if (!UnswitchedVals.insert(UnswitchVal))
+ continue;
+
+ if (UnswitchIfProfitable(LoopCond, UnswitchVal)) {
+ ++NumSwitches;
+ return true;
+ }
+ }
+ }
+
+ // Scan the instructions to check for unswitchable values.
+ for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end();
+ BBI != E; ++BBI)
+ if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) {
+ Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
+ currentLoop, Changed);
+ if (LoopCond && UnswitchIfProfitable(LoopCond,
+ ConstantInt::getTrue(Context))) {
+ ++NumSelects;
+ return true;
+ }
+ }
+ }
+ return Changed;
+}
+
+/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the
+/// loop with no side effects (including infinite loops).
+///
+/// If true, we return true and set ExitBB to the block we
+/// exit through.
+///
+static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
+ BasicBlock *&ExitBB,
+ std::set<BasicBlock*> &Visited) {
+ if (!Visited.insert(BB).second) {
+ // Already visited. Without more analysis, this could indicate an infinte loop.
+ return false;
+ } else if (!L->contains(BB)) {
+ // Otherwise, this is a loop exit, this is fine so long as this is the
+ // first exit.
+ if (ExitBB != 0) return false;
+ ExitBB = BB;
+ return true;
+ }
+
+ // Otherwise, this is an unvisited intra-loop node. Check all successors.
+ for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) {
+ // Check to see if the successor is a trivial loop exit.
+ if (!isTrivialLoopExitBlockHelper(L, *SI, ExitBB, Visited))
+ return false;
+ }
+
+ // Okay, everything after this looks good, check to make sure that this block
+ // doesn't include any side effects.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (I->mayHaveSideEffects())
+ return false;
+
+ return true;
+}
+
+/// isTrivialLoopExitBlock - Return true if the specified block unconditionally
+/// leads to an exit from the specified loop, and has no side-effects in the
+/// process. If so, return the block that is exited to, otherwise return null.
+static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
+ std::set<BasicBlock*> Visited;
+ Visited.insert(L->getHeader()); // Branches to header make infinite loops.
+ BasicBlock *ExitBB = 0;
+ if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited))
+ return ExitBB;
+ return 0;
+}
+
+/// IsTrivialUnswitchCondition - Check to see if this unswitch condition is
+/// trivial: that is, that the condition controls whether or not the loop does
+/// anything at all. If this is a trivial condition, unswitching produces no
+/// code duplications (equivalently, it produces a simpler loop and a new empty
+/// loop, which gets deleted).
+///
+/// If this is a trivial condition, return true, otherwise return false. When
+/// returning true, this sets Cond and Val to the condition that controls the
+/// trivial condition: when Cond dynamically equals Val, the loop is known to
+/// exit. Finally, this sets LoopExit to the BB that the loop exits to when
+/// Cond == Val.
+///
+bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
+ BasicBlock **LoopExit) {
+ BasicBlock *Header = currentLoop->getHeader();
+ TerminatorInst *HeaderTerm = Header->getTerminator();
+ LLVMContext &Context = Header->getContext();
+
+ BasicBlock *LoopExitBB = 0;
+ if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) {
+ // If the header block doesn't end with a conditional branch on Cond, we
+ // can't handle it.
+ if (!BI->isConditional() || BI->getCondition() != Cond)
+ return false;
+
+ // Check to see if a successor of the branch is guaranteed to
+ // exit through a unique exit block without having any
+ // side-effects. If so, determine the value of Cond that causes it to do
+ // this.
+ if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ BI->getSuccessor(0)))) {
+ if (Val) *Val = ConstantInt::getTrue(Context);
+ } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ BI->getSuccessor(1)))) {
+ if (Val) *Val = ConstantInt::getFalse(Context);
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) {
+ // If this isn't a switch on Cond, we can't handle it.
+ if (SI->getCondition() != Cond) return false;
+
+ // Check to see if a successor of the switch is guaranteed to go to the
+ // latch block or exit through a one exit block without having any
+ // side-effects. If so, determine the value of Cond that causes it to do
+ // this. Note that we can't trivially unswitch on the default case.
+ for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
+ if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ SI->getSuccessor(i)))) {
+ // Okay, we found a trivial case, remember the value that is trivial.
+ if (Val) *Val = SI->getCaseValue(i);
+ break;
+ }
+ }
+
+ // If we didn't find a single unique LoopExit block, or if the loop exit block
+ // contains phi nodes, this isn't trivial.
+ if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
+ return false; // Can't handle this.
+
+ if (LoopExit) *LoopExit = LoopExitBB;
+
+ // We already know that nothing uses any scalar values defined inside of this
+ // loop. As such, we just have to check to see if this loop will execute any
+ // side-effecting instructions (e.g. stores, calls, volatile loads) in the
+ // part of the loop that the code *would* execute. We already checked the
+ // tail, check the header now.
+ for (BasicBlock::iterator I = Header->begin(), E = Header->end(); I != E; ++I)
+ if (I->mayHaveSideEffects())
+ return false;
+ return true;
+}
+
+/// UnswitchIfProfitable - We have found that we can unswitch currentLoop when
+/// LoopCond == Val to simplify the loop. If we decide that this is profitable,
+/// unswitch the loop, reprocess the pieces, then return true.
+bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
+
+ initLoopData();
+
+ // If LoopSimplify was unable to form a preheader, don't do any unswitching.
+ if (!loopPreheader)
+ return false;
+
+ Function *F = loopHeader->getParent();
+
+ Constant *CondVal = 0;
+ BasicBlock *ExitBlock = 0;
+ if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) {
+ // If the condition is trivial, always unswitch. There is no code growth
+ // for this case.
+ UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock);
+ return true;
+ }
+
+ // Check to see if it would be profitable to unswitch current loop.
+
+ // Do not do non-trivial unswitch while optimizing for size.
+ if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize))
+ return false;
+
+ // FIXME: This is overly conservative because it does not take into
+ // consideration code simplification opportunities and code that can
+ // be shared by the resultant unswitched loops.
+ CodeMetrics Metrics;
+ for (Loop::block_iterator I = currentLoop->block_begin(),
+ E = currentLoop->block_end();
+ I != E; ++I)
+ Metrics.analyzeBasicBlock(*I);
+
+ // Limit the number of instructions to avoid causing significant code
+ // expansion, and the number of basic blocks, to avoid loops with
+ // large numbers of branches which cause loop unswitching to go crazy.
+ // This is a very ad-hoc heuristic.
+ if (Metrics.NumInsts > Threshold ||
+ Metrics.NumBlocks * 5 > Threshold ||
+ Metrics.containsIndirectBr || Metrics.isRecursive) {
+ DEBUG(dbgs() << "NOT unswitching loop %"
+ << currentLoop->getHeader()->getName() << ", cost too high: "
+ << currentLoop->getBlocks().size() << "\n");
+ return false;
+ }
+
+ UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
+ return true;
+}
+
+// RemapInstruction - Convert the instruction operands from referencing the
+// current values into those specified by VMap.
+//
+static inline void RemapInstruction(Instruction *I,
+ ValueMap<const Value *, Value*> &VMap) {
+ for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
+ Value *Op = I->getOperand(op);
+ ValueMap<const Value *, Value*>::iterator It = VMap.find(Op);
+ if (It != VMap.end()) Op = It->second;
+ I->setOperand(op, Op);
+ }
+}
+
+/// CloneLoop - Recursively clone the specified loop and all of its children,
+/// mapping the blocks with the specified map.
+static Loop *CloneLoop(Loop *L, Loop *PL, ValueMap<const Value*, Value*> &VM,
+ LoopInfo *LI, LPPassManager *LPM) {
+ Loop *New = new Loop();
+ LPM->insertLoop(New, PL);
+
+ // Add all of the blocks in L to the new loop.
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I)
+ if (LI->getLoopFor(*I) == L)
+ New->addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), LI->getBase());
+
+ // Add all of the subloops to the new loop.
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ CloneLoop(*I, New, VM, LI, LPM);
+
+ return New;
+}
+
+/// EmitPreheaderBranchOnCondition - Emit a conditional branch on two values
+/// if LIC == Val, branch to TrueDst, otherwise branch to FalseDest. Insert the
+/// code immediately before InsertPt.
+void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
+ BasicBlock *TrueDest,
+ BasicBlock *FalseDest,
+ Instruction *InsertPt) {
+ // Insert a conditional branch on LIC to the two preheaders. The original
+ // code is the true version and the new code is the false version.
+ Value *BranchVal = LIC;
+ if (!isa<ConstantInt>(Val) ||
+ Val->getType() != Type::getInt1Ty(LIC->getContext()))
+ BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val, "tmp");
+ else if (Val != ConstantInt::getTrue(Val->getContext()))
+ // We want to enter the new loop when the condition is true.
+ std::swap(TrueDest, FalseDest);
+
+ // Insert the new branch.
+ BranchInst *BI = BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt);
+
+ // If either edge is critical, split it. This helps preserve LoopSimplify
+ // form for enclosing loops.
+ SplitCriticalEdge(BI, 0, this);
+ SplitCriticalEdge(BI, 1, this);
+}
+
+/// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable
+/// condition in it (a cond branch from its header block to its latch block,
+/// where the path through the loop that doesn't execute its body has no
+/// side-effects), unswitch it. This doesn't involve any code duplication, just
+/// moving the conditional branch outside of the loop and updating loop info.
+void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
+ Constant *Val,
+ BasicBlock *ExitBlock) {
+ DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %"
+ << loopHeader->getName() << " [" << L->getBlocks().size()
+ << " blocks] in Function " << L->getHeader()->getParent()->getName()
+ << " on cond: " << *Val << " == " << *Cond << "\n");
+
+ // First step, split the preheader, so that we know that there is a safe place
+ // to insert the conditional branch. We will change loopPreheader to have a
+ // conditional branch on Cond.
+ BasicBlock *NewPH = SplitEdge(loopPreheader, loopHeader, this);
+
+ // Now that we have a place to insert the conditional branch, create a place
+ // to branch to: this is the exit block out of the loop that we should
+ // short-circuit to.
+
+ // Split this block now, so that the loop maintains its exit block, and so
+ // that the jump from the preheader can execute the contents of the exit block
+ // without actually branching to it (the exit block should be dominated by the
+ // loop header, not the preheader).
+ assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
+ BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), this);
+
+ // Okay, now we have a position to branch from and a position to branch to,
+ // insert the new conditional branch.
+ EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH,
+ loopPreheader->getTerminator());
+ LPM->deleteSimpleAnalysisValue(loopPreheader->getTerminator(), L);
+ loopPreheader->getTerminator()->eraseFromParent();
+
+ // We need to reprocess this loop, it could be unswitched again.
+ redoLoop = true;
+
+ // Now that we know that the loop is never entered when this condition is a
+ // particular value, rewrite the loop with this info. We know that this will
+ // at least eliminate the old branch.
+ RewriteLoopBodyWithConditionConstant(L, Cond, Val, false);
+ ++NumTrivial;
+}
+
+/// SplitExitEdges - Split all of the edges from inside the loop to their exit
+/// blocks. Update the appropriate Phi nodes as we do so.
+void LoopUnswitch::SplitExitEdges(Loop *L,
+ const SmallVector<BasicBlock *, 8> &ExitBlocks){
+
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = ExitBlocks[i];
+ SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock),
+ pred_end(ExitBlock));
+ SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(),
+ ".us-lcssa", this);
+ }
+}
+
+/// UnswitchNontrivialCondition - We determined that the loop is profitable
+/// to unswitch when LIC equal Val. Split it into loop versions and test the
+/// condition outside of either loop. Return the loops created as Out1/Out2.
+void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
+ Loop *L) {
+ Function *F = loopHeader->getParent();
+ DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
+ << loopHeader->getName() << " [" << L->getBlocks().size()
+ << " blocks] in Function " << F->getName()
+ << " when '" << *Val << "' == " << *LIC << "\n");
+
+ LoopBlocks.clear();
+ NewBlocks.clear();
+
+ // First step, split the preheader and exit blocks, and add these blocks to
+ // the LoopBlocks list.
+ BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, this);
+ LoopBlocks.push_back(NewPreheader);
+
+ // We want the loop to come after the preheader, but before the exit blocks.
+ LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getUniqueExitBlocks(ExitBlocks);
+
+ // Split all of the edges from inside the loop to their exit blocks. Update
+ // the appropriate Phi nodes as we do so.
+ SplitExitEdges(L, ExitBlocks);
+
+ // The exit blocks may have been changed due to edge splitting, recompute.
+ ExitBlocks.clear();
+ L->getUniqueExitBlocks(ExitBlocks);
+
+ // Add exit blocks to the loop blocks.
+ LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end());
+
+ // Next step, clone all of the basic blocks that make up the loop (including
+ // the loop preheader and exit blocks), keeping track of the mapping between
+ // the instructions and blocks.
+ NewBlocks.reserve(LoopBlocks.size());
+ ValueMap<const Value*, Value*> VMap;
+ for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
+ BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
+ NewBlocks.push_back(NewBB);
+ VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping.
+ LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
+ }
+
+ // Splice the newly inserted blocks into the function right before the
+ // original preheader.
+ F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(),
+ NewBlocks[0], F->end());
+
+ // Now we create the new Loop object for the versioned loop.
+ Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM);
+ Loop *ParentLoop = L->getParentLoop();
+ if (ParentLoop) {
+ // Make sure to add the cloned preheader and exit blocks to the parent loop
+ // as well.
+ ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase());
+ }
+
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]);
+ // The new exit block should be in the same loop as the old one.
+ if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
+ ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase());
+
+ assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
+ "Exit block should have been split to have one successor!");
+ BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);
+
+ // If the successor of the exit block had PHI nodes, add an entry for
+ // NewExit.
+ PHINode *PN;
+ for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) {
+ PN = cast<PHINode>(I);
+ Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
+ ValueMap<const Value *, Value*>::iterator It = VMap.find(V);
+ if (It != VMap.end()) V = It->second;
+ PN->addIncoming(V, NewExit);
+ }
+ }
+
+ // Rewrite the code to refer to itself.
+ for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
+ for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+ E = NewBlocks[i]->end(); I != E; ++I)
+ RemapInstruction(I, VMap);
+
+ // Rewrite the original preheader to select between versions of the loop.
+ BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
+ assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
+ "Preheader splitting did not work correctly!");
+
+ // Emit the new branch that selects between the two versions of this loop.
+ EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR);
+ LPM->deleteSimpleAnalysisValue(OldBR, L);
+ OldBR->eraseFromParent();
+
+ LoopProcessWorklist.push_back(NewLoop);
+ redoLoop = true;
+
+ // Keep a WeakVH holding onto LIC. If the first call to RewriteLoopBody
+ // deletes the instruction (for example by simplifying a PHI that feeds into
+ // the condition that we're unswitching on), we don't rewrite the second
+ // iteration.
+ WeakVH LICHandle(LIC);
+
+ // Now we rewrite the original code to know that the condition is true and the
+ // new code to know that the condition is false.
+ RewriteLoopBodyWithConditionConstant(L, LIC, Val, false);
+
+ // It's possible that simplifying one loop could cause the other to be
+ // changed to another value or a constant. If its a constant, don't simplify
+ // it.
+ if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
+ LICHandle && !isa<Constant>(LICHandle))
+ RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true);
+}
+
+/// RemoveFromWorklist - Remove all instances of I from the worklist vector
+/// specified.
+static void RemoveFromWorklist(Instruction *I,
+ std::vector<Instruction*> &Worklist) {
+ std::vector<Instruction*>::iterator WI = std::find(Worklist.begin(),
+ Worklist.end(), I);
+ while (WI != Worklist.end()) {
+ unsigned Offset = WI-Worklist.begin();
+ Worklist.erase(WI);
+ WI = std::find(Worklist.begin()+Offset, Worklist.end(), I);
+ }
+}
+
+/// ReplaceUsesOfWith - When we find that I really equals V, remove I from the
+/// program, replacing all uses with V and update the worklist.
+static void ReplaceUsesOfWith(Instruction *I, Value *V,
+ std::vector<Instruction*> &Worklist,
+ Loop *L, LPPassManager *LPM) {
+ DEBUG(dbgs() << "Replace with '" << *V << "': " << *I);
+
+ // Add uses to the worklist, which may be dead now.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
+ Worklist.push_back(Use);
+
+ // Add users to the worklist which may be simplified now.
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI)
+ Worklist.push_back(cast<Instruction>(*UI));
+ LPM->deleteSimpleAnalysisValue(I, L);
+ RemoveFromWorklist(I, Worklist);
+ I->replaceAllUsesWith(V);
+ I->eraseFromParent();
+ ++NumSimplify;
+}
+
+/// RemoveBlockIfDead - If the specified block is dead, remove it, update loop
+/// information, and remove any dead successors it has.
+///
+void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
+ std::vector<Instruction*> &Worklist,
+ Loop *L) {
+ if (pred_begin(BB) != pred_end(BB)) {
+ // This block isn't dead, since an edge to BB was just removed, see if there
+ // are any easy simplifications we can do now.
+ if (BasicBlock *Pred = BB->getSinglePredecessor()) {
+ // If it has one pred, fold phi nodes in BB.
+ while (isa<PHINode>(BB->begin()))
+ ReplaceUsesOfWith(BB->begin(),
+ cast<PHINode>(BB->begin())->getIncomingValue(0),
+ Worklist, L, LPM);
+
+ // If this is the header of a loop and the only pred is the latch, we now
+ // have an unreachable loop.
+ if (Loop *L = LI->getLoopFor(BB))
+ if (loopHeader == BB && L->contains(Pred)) {
+ // Remove the branch from the latch to the header block, this makes
+ // the header dead, which will make the latch dead (because the header
+ // dominates the latch).
+ LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L);
+ Pred->getTerminator()->eraseFromParent();
+ new UnreachableInst(BB->getContext(), Pred);
+
+ // The loop is now broken, remove it from LI.
+ RemoveLoopFromHierarchy(L);
+
+ // Reprocess the header, which now IS dead.
+ RemoveBlockIfDead(BB, Worklist, L);
+ return;
+ }
+
+ // If pred ends in a uncond branch, add uncond branch to worklist so that
+ // the two blocks will get merged.
+ if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
+ if (BI->isUnconditional())
+ Worklist.push_back(BI);
+ }
+ return;
+ }
+
+ DEBUG(dbgs() << "Nuking dead block: " << *BB);
+
+ // Remove the instructions in the basic block from the worklist.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ RemoveFromWorklist(I, Worklist);
+
+ // Anything that uses the instructions in this basic block should have their
+ // uses replaced with undefs.
+ // If I is not void type then replaceAllUsesWith undef.
+ // This allows ValueHandlers and custom metadata to adjust itself.
+ if (!I->getType()->isVoidTy())
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ }
+
+ // If this is the edge to the header block for a loop, remove the loop and
+ // promote all subloops.
+ if (Loop *BBLoop = LI->getLoopFor(BB)) {
+ if (BBLoop->getLoopLatch() == BB)
+ RemoveLoopFromHierarchy(BBLoop);
+ }
+
+ // Remove the block from the loop info, which removes it from any loops it
+ // was in.
+ LI->removeBlock(BB);
+
+
+ // Remove phi node entries in successors for this block.
+ TerminatorInst *TI = BB->getTerminator();
+ SmallVector<BasicBlock*, 4> Succs;
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+ Succs.push_back(TI->getSuccessor(i));
+ TI->getSuccessor(i)->removePredecessor(BB);
+ }
+
+ // Unique the successors, remove anything with multiple uses.
+ array_pod_sort(Succs.begin(), Succs.end());
+ Succs.erase(std::unique(Succs.begin(), Succs.end()), Succs.end());
+
+ // Remove the basic block, including all of the instructions contained in it.
+ LPM->deleteSimpleAnalysisValue(BB, L);
+ BB->eraseFromParent();
+ // Remove successor blocks here that are not dead, so that we know we only
+ // have dead blocks in this list. Nondead blocks have a way of becoming dead,
+ // then getting removed before we revisit them, which is badness.
+ //
+ for (unsigned i = 0; i != Succs.size(); ++i)
+ if (pred_begin(Succs[i]) != pred_end(Succs[i])) {
+ // One exception is loop headers. If this block was the preheader for a
+ // loop, then we DO want to visit the loop so the loop gets deleted.
+ // We know that if the successor is a loop header, that this loop had to
+ // be the preheader: the case where this was the latch block was handled
+ // above and headers can only have two predecessors.
+ if (!LI->isLoopHeader(Succs[i])) {
+ Succs.erase(Succs.begin()+i);
+ --i;
+ }
+ }
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+ RemoveBlockIfDead(Succs[i], Worklist, L);
+}
+
+/// RemoveLoopFromHierarchy - We have discovered that the specified loop has
+/// become unwrapped, either because the backedge was deleted, or because the
+/// edge into the header was removed. If the edge into the header from the
+/// latch block was removed, the loop is unwrapped but subloops are still alive,
+/// so they just reparent loops. If the loops are actually dead, they will be
+/// removed later.
+void LoopUnswitch::RemoveLoopFromHierarchy(Loop *L) {
+ LPM->deleteLoopFromQueue(L);
+ RemoveLoopFromWorklist(L);
+}
+
+// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has
+// the value specified by Val in the specified loop, or we know it does NOT have
+// that value. Rewrite any uses of LIC or of properties correlated to it.
+void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
+ Constant *Val,
+ bool IsEqual) {
+ assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
+
+ // FIXME: Support correlated properties, like:
+ // for (...)
+ // if (li1 < li2)
+ // ...
+ // if (li1 > li2)
+ // ...
+
+ // FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches,
+ // selects, switches.
+ std::vector<User*> Users(LIC->use_begin(), LIC->use_end());
+ std::vector<Instruction*> Worklist;
+ LLVMContext &Context = Val->getContext();
+
+
+ // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC
+ // in the loop with the appropriate one directly.
+ if (IsEqual || (isa<ConstantInt>(Val) &&
+ Val->getType()->isIntegerTy(1))) {
+ Value *Replacement;
+ if (IsEqual)
+ Replacement = Val;
+ else
+ Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()),
+ !cast<ConstantInt>(Val)->getZExtValue());
+
+ for (unsigned i = 0, e = Users.size(); i != e; ++i)
+ if (Instruction *U = cast<Instruction>(Users[i])) {
+ if (!L->contains(U))
+ continue;
+ U->replaceUsesOfWith(LIC, Replacement);
+ Worklist.push_back(U);
+ }
+ SimplifyCode(Worklist, L);
+ return;
+ }
+
+ // Otherwise, we don't know the precise value of LIC, but we do know that it
+ // is certainly NOT "Val". As such, simplify any uses in the loop that we
+ // can. This case occurs when we unswitch switch statements.
+ for (unsigned i = 0, e = Users.size(); i != e; ++i) {
+ Instruction *U = cast<Instruction>(Users[i]);
+ if (!L->contains(U))
+ continue;
+
+ Worklist.push_back(U);
+
+ // TODO: We could do other simplifications, for example, turning
+ // 'icmp eq LIC, Val' -> false.
+
+ // If we know that LIC is not Val, use this info to simplify code.
+ SwitchInst *SI = dyn_cast<SwitchInst>(U);
+ if (SI == 0 || !isa<ConstantInt>(Val)) continue;
+
+ unsigned DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
+ if (DeadCase == 0) continue; // Default case is live for multiple values.
+
+ // Found a dead case value. Don't remove PHI nodes in the
+ // successor if they become single-entry, those PHI nodes may
+ // be in the Users list.
+
+ // FIXME: This is a hack. We need to keep the successor around
+ // and hooked up so as to preserve the loop structure, because
+ // trying to update it is complicated. So instead we preserve the
+ // loop structure and put the block on a dead code path.
+ BasicBlock *Switch = SI->getParent();
+ SplitEdge(Switch, SI->getSuccessor(DeadCase), this);
+ // Compute the successors instead of relying on the return value
+ // of SplitEdge, since it may have split the switch successor
+ // after PHI nodes.
+ BasicBlock *NewSISucc = SI->getSuccessor(DeadCase);
+ BasicBlock *OldSISucc = *succ_begin(NewSISucc);
+ // Create an "unreachable" destination.
+ BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable",
+ Switch->getParent(),
+ OldSISucc);
+ new UnreachableInst(Context, Abort);
+ // Force the new case destination to branch to the "unreachable"
+ // block while maintaining a (dead) CFG edge to the old block.
+ NewSISucc->getTerminator()->eraseFromParent();
+ BranchInst::Create(Abort, OldSISucc,
+ ConstantInt::getTrue(Context), NewSISucc);
+ // Release the PHI operands for this edge.
+ for (BasicBlock::iterator II = NewSISucc->begin();
+ PHINode *PN = dyn_cast<PHINode>(II); ++II)
+ PN->setIncomingValue(PN->getBasicBlockIndex(Switch),
+ UndefValue::get(PN->getType()));
+ // Tell the domtree about the new block. We don't fully update the
+ // domtree here -- instead we force it to do a full recomputation
+ // after the pass is complete -- but we do need to inform it of
+ // new blocks.
+ if (DT)
+ DT->addNewBlock(Abort, NewSISucc);
+ }
+
+ SimplifyCode(Worklist, L);
+}
+
+/// SimplifyCode - Okay, now that we have simplified some instructions in the
+/// loop, walk over it and constant prop, dce, and fold control flow where
+/// possible. Note that this is effectively a very simple loop-structure-aware
+/// optimizer. During processing of this loop, L could very well be deleted, so
+/// it must not be used.
+///
+/// FIXME: When the loop optimizer is more mature, separate this out to a new
+/// pass.
+///
+void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.back();
+ Worklist.pop_back();
+
+ // Simple constant folding.
+ if (Constant *C = ConstantFoldInstruction(I)) {
+ ReplaceUsesOfWith(I, C, Worklist, L, LPM);
+ continue;
+ }
+
+ // Simple DCE.
+ if (isInstructionTriviallyDead(I)) {
+ DEBUG(dbgs() << "Remove dead instruction '" << *I);
+
+ // Add uses to the worklist, which may be dead now.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
+ Worklist.push_back(Use);
+ LPM->deleteSimpleAnalysisValue(I, L);
+ RemoveFromWorklist(I, Worklist);
+ I->eraseFromParent();
+ ++NumSimplify;
+ continue;
+ }
+
+ // See if instruction simplification can hack this up. This is common for
+ // things like "select false, X, Y" after unswitching made the condition be
+ // 'false'.
+ if (Value *V = SimplifyInstruction(I)) {
+ ReplaceUsesOfWith(I, V, Worklist, L, LPM);
+ continue;
+ }
+
+ // Special case hacks that appear commonly in unswitched code.
+ if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+ if (BI->isUnconditional()) {
+ // If BI's parent is the only pred of the successor, fold the two blocks
+ // together.
+ BasicBlock *Pred = BI->getParent();
+ BasicBlock *Succ = BI->getSuccessor(0);
+ BasicBlock *SinglePred = Succ->getSinglePredecessor();
+ if (!SinglePred) continue; // Nothing to do.
+ assert(SinglePred == Pred && "CFG broken");
+
+ DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- "
+ << Succ->getName() << "\n");
+
+ // Resolve any single entry PHI nodes in Succ.
+ while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
+ ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
+
+ // Move all of the successor contents from Succ to Pred.
+ Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(),
+ Succ->end());
+ LPM->deleteSimpleAnalysisValue(BI, L);
+ BI->eraseFromParent();
+ RemoveFromWorklist(BI, Worklist);
+
+ // If Succ has any successors with PHI nodes, update them to have
+ // entries coming from Pred instead of Succ.
+ Succ->replaceAllUsesWith(Pred);
+
+ // Remove Succ from the loop tree.
+ LI->removeBlock(Succ);
+ LPM->deleteSimpleAnalysisValue(Succ, L);
+ Succ->eraseFromParent();
+ ++NumSimplify;
+ continue;
+ }
+
+ if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){
+ // Conditional branch. Turn it into an unconditional branch, then
+ // remove dead blocks.
+ continue; // FIXME: Enable.
+
+ DEBUG(dbgs() << "Folded branch: " << *BI);
+ BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue());
+ BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue());
+ DeadSucc->removePredecessor(BI->getParent(), true);
+ Worklist.push_back(BranchInst::Create(LiveSucc, BI));
+ LPM->deleteSimpleAnalysisValue(BI, L);
+ BI->eraseFromParent();
+ RemoveFromWorklist(BI, Worklist);
+ ++NumSimplify;
+
+ RemoveBlockIfDead(DeadSucc, Worklist, L);
+ }
+ continue;
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
new file mode 100644
index 0000000..973ffe7
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -0,0 +1,161 @@
+//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers atomic intrinsics to non-atomic form for use in a known
+// non-preemptible environment.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loweratomic"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Function.h"
+#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/IRBuilder.h"
+
+using namespace llvm;
+
+namespace {
+
+bool LowerAtomicIntrinsic(CallInst *CI) {
+ IRBuilder<> Builder(CI->getParent(), CI);
+
+ Function *Callee = CI->getCalledFunction();
+ if (!Callee)
+ return false;
+
+ unsigned IID = Callee->getIntrinsicID();
+ switch (IID) {
+ case Intrinsic::memory_barrier:
+ break;
+
+ case Intrinsic::atomic_load_add:
+ case Intrinsic::atomic_load_sub:
+ case Intrinsic::atomic_load_and:
+ case Intrinsic::atomic_load_nand:
+ case Intrinsic::atomic_load_or:
+ case Intrinsic::atomic_load_xor:
+ case Intrinsic::atomic_load_max:
+ case Intrinsic::atomic_load_min:
+ case Intrinsic::atomic_load_umax:
+ case Intrinsic::atomic_load_umin: {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Delta = CI->getArgOperand(1);
+
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Value *Res = NULL;
+ switch (IID) {
+ default: assert(0 && "Unrecognized atomic modify operation");
+ case Intrinsic::atomic_load_add:
+ Res = Builder.CreateAdd(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_sub:
+ Res = Builder.CreateSub(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_and:
+ Res = Builder.CreateAnd(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_nand:
+ Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta));
+ break;
+ case Intrinsic::atomic_load_or:
+ Res = Builder.CreateOr(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_xor:
+ Res = Builder.CreateXor(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_max:
+ Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
+ Delta,
+ Orig);
+ break;
+ case Intrinsic::atomic_load_min:
+ Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
+ Orig,
+ Delta);
+ break;
+ case Intrinsic::atomic_load_umax:
+ Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
+ Delta,
+ Orig);
+ break;
+ case Intrinsic::atomic_load_umin:
+ Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
+ Orig,
+ Delta);
+ break;
+ }
+ Builder.CreateStore(Res, Ptr);
+
+ CI->replaceAllUsesWith(Orig);
+ break;
+ }
+
+ case Intrinsic::atomic_swap: {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Val = CI->getArgOperand(1);
+
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Builder.CreateStore(Val, Ptr);
+
+ CI->replaceAllUsesWith(Orig);
+ break;
+ }
+
+ case Intrinsic::atomic_cmp_swap: {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Cmp = CI->getArgOperand(1);
+ Value *Val = CI->getArgOperand(2);
+
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
+ Value *Res = Builder.CreateSelect(Equal, Val, Orig);
+ Builder.CreateStore(Res, Ptr);
+
+ CI->replaceAllUsesWith(Orig);
+ break;
+ }
+
+ default:
+ return false;
+ }
+
+ assert(CI->use_empty() &&
+ "Lowering should have eliminated any uses of the intrinsic call!");
+ CI->eraseFromParent();
+
+ return true;
+}
+
+struct LowerAtomic : public BasicBlockPass {
+ static char ID;
+ LowerAtomic() : BasicBlockPass(ID) {}
+ bool runOnBasicBlock(BasicBlock &BB) {
+ bool Changed = false;
+ for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) {
+ Instruction *Inst = DI++;
+ if (CallInst *CI = dyn_cast<CallInst>(Inst))
+ Changed |= LowerAtomicIntrinsic(CI);
+ }
+ return Changed;
+ }
+
+};
+
+}
+
+char LowerAtomic::ID = 0;
+INITIALIZE_PASS(LowerAtomic, "loweratomic",
+ "Lower atomic intrinsics to non-atomic form",
+ false, false);
+
+Pass *llvm::createLowerAtomicPass() { return new LowerAtomic(); }
diff --git a/contrib/llvm/lib/Transforms/Scalar/Makefile b/contrib/llvm/lib/Transforms/Scalar/Makefile
new file mode 100644
index 0000000..cc42fd0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Transforms/Scalar/Makefile ----------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMScalarOpts
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
new file mode 100644
index 0000000..24fae42
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -0,0 +1,801 @@
+//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs various transformations related to eliminating memcpy
+// calls, or transforming sets of stores into memset's.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "memcpyopt"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include <list>
+using namespace llvm;
+
+STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
+STATISTIC(NumMemSetInfer, "Number of memsets inferred");
+STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
+
+/// isBytewiseValue - If the specified value can be set by repeating the same
+/// byte in memory, return the i8 value that it is represented with. This is
+/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
+/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated
+/// byte store (e.g. i16 0x1234), return null.
+static Value *isBytewiseValue(Value *V) {
+ LLVMContext &Context = V->getContext();
+
+ // All byte-wide stores are splatable, even of arbitrary variables.
+ if (V->getType()->isIntegerTy(8)) return V;
+
+ // Constant float and double values can be handled as integer values if the
+ // corresponding integer value is "byteable". An important case is 0.0.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+ if (CFP->getType()->isFloatTy())
+ V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(Context));
+ if (CFP->getType()->isDoubleTy())
+ V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(Context));
+ // Don't handle long double formats, which have strange constraints.
+ }
+
+ // We can handle constant integers that are power of two in size and a
+ // multiple of 8 bits.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ unsigned Width = CI->getBitWidth();
+ if (isPowerOf2_32(Width) && Width > 8) {
+ // We can handle this value if the recursive binary decomposition is the
+ // same at all levels.
+ APInt Val = CI->getValue();
+ APInt Val2;
+ while (Val.getBitWidth() != 8) {
+ unsigned NextWidth = Val.getBitWidth()/2;
+ Val2 = Val.lshr(NextWidth);
+ Val2.trunc(Val.getBitWidth()/2);
+ Val.trunc(Val.getBitWidth()/2);
+
+ // If the top/bottom halves aren't the same, reject it.
+ if (Val != Val2)
+ return 0;
+ }
+ return ConstantInt::get(Context, Val);
+ }
+ }
+
+ // Conceptually, we could handle things like:
+ // %a = zext i8 %X to i16
+ // %b = shl i16 %a, 8
+ // %c = or i16 %a, %b
+ // but until there is an example that actually needs this, it doesn't seem
+ // worth worrying about.
+ return 0;
+}
+
+static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
+ bool &VariableIdxFound, TargetData &TD) {
+ // Skip over the first indices.
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (unsigned i = 1; i != Idx; ++i, ++GTI)
+ /*skip along*/;
+
+ // Compute the offset implied by the rest of the indices.
+ int64_t Offset = 0;
+ for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
+ ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (OpC == 0)
+ return VariableIdxFound = true;
+ if (OpC->isZero()) continue; // No offset.
+
+ // Handle struct indices, which add their field offset to the pointer.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+ continue;
+ }
+
+ // Otherwise, we have a sequential type like an array or vector. Multiply
+ // the index by the ElementSize.
+ uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+ Offset += Size*OpC->getSExtValue();
+ }
+
+ return Offset;
+}
+
+/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a
+/// constant offset, and return that constant offset. For example, Ptr1 might
+/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8.
+static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
+ TargetData &TD) {
+ // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
+ // base. After that base, they may have some number of common (and
+ // potentially variable) indices. After that they handle some constant
+ // offset, which determines their offset from each other. At this point, we
+ // handle no other case.
+ GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
+ GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
+ if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
+ return false;
+
+ // Skip any common indices and track the GEP types.
+ unsigned Idx = 1;
+ for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
+ if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
+ break;
+
+ bool VariableIdxFound = false;
+ int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD);
+ int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
+ if (VariableIdxFound) return false;
+
+ Offset = Offset2-Offset1;
+ return true;
+}
+
+
+/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value.
+/// This allows us to analyze stores like:
+/// store 0 -> P+1
+/// store 0 -> P+0
+/// store 0 -> P+3
+/// store 0 -> P+2
+/// which sometimes happens with stores to arrays of structs etc. When we see
+/// the first store, we make a range [1, 2). The second store extends the range
+/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
+/// two ranges into [0, 3) which is memset'able.
+namespace {
+struct MemsetRange {
+ // Start/End - A semi range that describes the span that this range covers.
+ // The range is closed at the start and open at the end: [Start, End).
+ int64_t Start, End;
+
+ /// StartPtr - The getelementptr instruction that points to the start of the
+ /// range.
+ Value *StartPtr;
+
+ /// Alignment - The known alignment of the first store.
+ unsigned Alignment;
+
+ /// TheStores - The actual stores that make up this range.
+ SmallVector<StoreInst*, 16> TheStores;
+
+ bool isProfitableToUseMemset(const TargetData &TD) const;
+
+};
+} // end anon namespace
+
+bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
+ // If we found more than 8 stores to merge or 64 bytes, use memset.
+ if (TheStores.size() >= 8 || End-Start >= 64) return true;
+
+ // Assume that the code generator is capable of merging pairs of stores
+ // together if it wants to.
+ if (TheStores.size() <= 2) return false;
+
+ // If we have fewer than 8 stores, it can still be worthwhile to do this.
+ // For example, merging 4 i8 stores into an i32 store is useful almost always.
+ // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
+ // memset will be split into 2 32-bit stores anyway) and doing so can
+ // pessimize the llvm optimizer.
+ //
+ // Since we don't have perfect knowledge here, make some assumptions: assume
+ // the maximum GPR width is the same size as the pointer size and assume that
+ // this width can be stored. If so, check to see whether we will end up
+ // actually reducing the number of stores used.
+ unsigned Bytes = unsigned(End-Start);
+ unsigned NumPointerStores = Bytes/TD.getPointerSize();
+
+ // Assume the remaining bytes if any are done a byte at a time.
+ unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize();
+
+ // If we will reduce the # stores (according to this heuristic), do the
+ // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
+ // etc.
+ return TheStores.size() > NumPointerStores+NumByteStores;
+}
+
+
+namespace {
+class MemsetRanges {
+ /// Ranges - A sorted list of the memset ranges. We use std::list here
+ /// because each element is relatively large and expensive to copy.
+ std::list<MemsetRange> Ranges;
+ typedef std::list<MemsetRange>::iterator range_iterator;
+ TargetData &TD;
+public:
+ MemsetRanges(TargetData &td) : TD(td) {}
+
+ typedef std::list<MemsetRange>::const_iterator const_iterator;
+ const_iterator begin() const { return Ranges.begin(); }
+ const_iterator end() const { return Ranges.end(); }
+ bool empty() const { return Ranges.empty(); }
+
+ void addStore(int64_t OffsetFromFirst, StoreInst *SI);
+};
+
+} // end anon namespace
+
+
+/// addStore - Add a new store to the MemsetRanges data structure. This adds a
+/// new range for the specified store at the specified offset, merging into
+/// existing ranges as appropriate.
+void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
+ int64_t End = Start+TD.getTypeStoreSize(SI->getOperand(0)->getType());
+
+ // Do a linear search of the ranges to see if this can be joined and/or to
+ // find the insertion point in the list. We keep the ranges sorted for
+ // simplicity here. This is a linear search of a linked list, which is ugly,
+ // however the number of ranges is limited, so this won't get crazy slow.
+ range_iterator I = Ranges.begin(), E = Ranges.end();
+
+ while (I != E && Start > I->End)
+ ++I;
+
+ // We now know that I == E, in which case we didn't find anything to merge
+ // with, or that Start <= I->End. If End < I->Start or I == E, then we need
+ // to insert a new range. Handle this now.
+ if (I == E || End < I->Start) {
+ MemsetRange &R = *Ranges.insert(I, MemsetRange());
+ R.Start = Start;
+ R.End = End;
+ R.StartPtr = SI->getPointerOperand();
+ R.Alignment = SI->getAlignment();
+ R.TheStores.push_back(SI);
+ return;
+ }
+
+ // This store overlaps with I, add it.
+ I->TheStores.push_back(SI);
+
+ // At this point, we may have an interval that completely contains our store.
+ // If so, just add it to the interval and return.
+ if (I->Start <= Start && I->End >= End)
+ return;
+
+ // Now we know that Start <= I->End and End >= I->Start so the range overlaps
+ // but is not entirely contained within the range.
+
+ // See if the range extends the start of the range. In this case, it couldn't
+ // possibly cause it to join the prior range, because otherwise we would have
+ // stopped on *it*.
+ if (Start < I->Start) {
+ I->Start = Start;
+ I->StartPtr = SI->getPointerOperand();
+ I->Alignment = SI->getAlignment();
+ }
+
+ // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
+ // is in or right at the end of I), and that End >= I->Start. Extend I out to
+ // End.
+ if (End > I->End) {
+ I->End = End;
+ range_iterator NextI = I;
+ while (++NextI != E && End >= NextI->Start) {
+ // Merge the range in.
+ I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
+ if (NextI->End > I->End)
+ I->End = NextI->End;
+ Ranges.erase(NextI);
+ NextI = I;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// MemCpyOpt Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+ class MemCpyOpt : public FunctionPass {
+ bool runOnFunction(Function &F);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ MemCpyOpt() : FunctionPass(ID) {}
+
+ private:
+ // This transformation requires dominator postdominator info
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<MemoryDependenceAnalysis>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<MemoryDependenceAnalysis>();
+ }
+
+ // Helper fuctions
+ bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
+ bool processMemCpy(MemCpyInst *M);
+ bool processMemMove(MemMoveInst *M);
+ bool performCallSlotOptzn(MemCpyInst *cpy, CallInst *C);
+ bool iterateOnFunction(Function &F);
+ };
+
+ char MemCpyOpt::ID = 0;
+}
+
+// createMemCpyOptPass - The public interface to this file...
+FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
+
+INITIALIZE_PASS(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false);
+
+
+
+/// processStore - When GVN is scanning forward over instructions, we look for
+/// some other patterns to fold away. In particular, this looks for stores to
+/// neighboring locations of memory. If it sees enough consequtive ones
+/// (currently 4) it attempts to merge them together into a memcpy/memset.
+bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
+ if (SI->isVolatile()) return false;
+
+ LLVMContext &Context = SI->getContext();
+
+ // There are two cases that are interesting for this code to handle: memcpy
+ // and memset. Right now we only handle memset.
+
+ // Ensure that the value being stored is something that can be memset'able a
+ // byte at a time like "0" or "-1" or any width, as well as things like
+ // 0xA0A0A0A0 and 0.0.
+ Value *ByteVal = isBytewiseValue(SI->getOperand(0));
+ if (!ByteVal)
+ return false;
+
+ TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ if (!TD) return false;
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ Module *M = SI->getParent()->getParent()->getParent();
+
+ // Okay, so we now have a single store that can be splatable. Scan to find
+ // all subsequent stores of the same value to offset from the same pointer.
+ // Join these together into ranges, so we can decide whether contiguous blocks
+ // are stored.
+ MemsetRanges Ranges(*TD);
+
+ Value *StartPtr = SI->getPointerOperand();
+
+ BasicBlock::iterator BI = SI;
+ for (++BI; !isa<TerminatorInst>(BI); ++BI) {
+ if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) {
+ // If the call is readnone, ignore it, otherwise bail out. We don't even
+ // allow readonly here because we don't want something like:
+ // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
+ if (AA.getModRefBehavior(CallSite(BI)) ==
+ AliasAnalysis::DoesNotAccessMemory)
+ continue;
+
+ // TODO: If this is a memset, try to join it in.
+
+ break;
+ } else if (isa<VAArgInst>(BI) || isa<LoadInst>(BI))
+ break;
+
+ // If this is a non-store instruction it is fine, ignore it.
+ StoreInst *NextStore = dyn_cast<StoreInst>(BI);
+ if (NextStore == 0) continue;
+
+ // If this is a store, see if we can merge it in.
+ if (NextStore->isVolatile()) break;
+
+ // Check to see if this stored value is of the same byte-splattable value.
+ if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
+ break;
+
+ // Check to see if this store is to a constant offset from the start ptr.
+ int64_t Offset;
+ if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD))
+ break;
+
+ Ranges.addStore(Offset, NextStore);
+ }
+
+ // If we have no ranges, then we just had a single store with nothing that
+ // could be merged in. This is a very common case of course.
+ if (Ranges.empty())
+ return false;
+
+ // If we had at least one store that could be merged in, add the starting
+ // store as well. We try to avoid this unless there is at least something
+ // interesting as a small compile-time optimization.
+ Ranges.addStore(0, SI);
+
+
+ // Now that we have full information about ranges, loop over the ranges and
+ // emit memset's for anything big enough to be worthwhile.
+ bool MadeChange = false;
+ for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
+ I != E; ++I) {
+ const MemsetRange &Range = *I;
+
+ if (Range.TheStores.size() == 1) continue;
+
+ // If it is profitable to lower this range to memset, do so now.
+ if (!Range.isProfitableToUseMemset(*TD))
+ continue;
+
+ // Otherwise, we do want to transform this! Create a new memset. We put
+ // the memset right before the first instruction that isn't part of this
+ // memset block. This ensure that the memset is dominated by any addressing
+ // instruction needed by the start of the block.
+ BasicBlock::iterator InsertPt = BI;
+
+ // Get the starting pointer of the block.
+ StartPtr = Range.StartPtr;
+
+ // Determine alignment
+ unsigned Alignment = Range.Alignment;
+ if (Alignment == 0) {
+ const Type *EltType =
+ cast<PointerType>(StartPtr->getType())->getElementType();
+ Alignment = TD->getABITypeAlignment(EltType);
+ }
+
+ // Cast the start ptr to be i8* as memset requires.
+ const PointerType* StartPTy = cast<PointerType>(StartPtr->getType());
+ const PointerType *i8Ptr = Type::getInt8PtrTy(Context,
+ StartPTy->getAddressSpace());
+ if (StartPTy!= i8Ptr)
+ StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(),
+ InsertPt);
+
+ Value *Ops[] = {
+ StartPtr, ByteVal, // Start, value
+ // size
+ ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start),
+ // align
+ ConstantInt::get(Type::getInt32Ty(Context), Alignment),
+ // volatile
+ ConstantInt::get(Type::getInt1Ty(Context), 0),
+ };
+ const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
+
+ Function *MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
+
+ Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt);
+ DEBUG(dbgs() << "Replace stores:\n";
+ for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
+ dbgs() << *Range.TheStores[i];
+ dbgs() << "With: " << *C); C=C;
+
+ // Don't invalidate the iterator
+ BBI = BI;
+
+ // Zap all the stores.
+ for (SmallVector<StoreInst*, 16>::const_iterator
+ SI = Range.TheStores.begin(),
+ SE = Range.TheStores.end(); SI != SE; ++SI)
+ (*SI)->eraseFromParent();
+ ++NumMemSetInfer;
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+
+/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
+/// and checks for the possibility of a call slot optimization by having
+/// the call write its result directly into the destination of the memcpy.
+bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
+ // The general transformation to keep in mind is
+ //
+ // call @func(..., src, ...)
+ // memcpy(dest, src, ...)
+ //
+ // ->
+ //
+ // memcpy(dest, src, ...)
+ // call @func(..., dest, ...)
+ //
+ // Since moving the memcpy is technically awkward, we additionally check that
+ // src only holds uninitialized values at the moment of the call, meaning that
+ // the memcpy can be discarded rather than moved.
+
+ // Deliberately get the source and destination with bitcasts stripped away,
+ // because we'll need to do type comparisons based on the underlying type.
+ Value *cpyDest = cpy->getDest();
+ Value *cpySrc = cpy->getSource();
+ CallSite CS(C);
+
+ // We need to be able to reason about the size of the memcpy, so we require
+ // that it be a constant.
+ ConstantInt *cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
+ if (!cpyLength)
+ return false;
+
+ // Require that src be an alloca. This simplifies the reasoning considerably.
+ AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
+ if (!srcAlloca)
+ return false;
+
+ // Check that all of src is copied to dest.
+ TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ if (!TD) return false;
+
+ ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
+ if (!srcArraySize)
+ return false;
+
+ uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) *
+ srcArraySize->getZExtValue();
+
+ if (cpyLength->getZExtValue() < srcSize)
+ return false;
+
+ // Check that accessing the first srcSize bytes of dest will not cause a
+ // trap. Otherwise the transform is invalid since it might cause a trap
+ // to occur earlier than it otherwise would.
+ if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) {
+ // The destination is an alloca. Check it is larger than srcSize.
+ ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
+ if (!destArraySize)
+ return false;
+
+ uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) *
+ destArraySize->getZExtValue();
+
+ if (destSize < srcSize)
+ return false;
+ } else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
+ // If the destination is an sret parameter then only accesses that are
+ // outside of the returned struct type can trap.
+ if (!A->hasStructRetAttr())
+ return false;
+
+ const Type *StructTy = cast<PointerType>(A->getType())->getElementType();
+ uint64_t destSize = TD->getTypeAllocSize(StructTy);
+
+ if (destSize < srcSize)
+ return false;
+ } else {
+ return false;
+ }
+
+ // Check that src is not accessed except via the call and the memcpy. This
+ // guarantees that it holds only undefined values when passed in (so the final
+ // memcpy can be dropped), that it is not read or written between the call and
+ // the memcpy, and that writing beyond the end of it is undefined.
+ SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(),
+ srcAlloca->use_end());
+ while (!srcUseList.empty()) {
+ User *UI = srcUseList.pop_back_val();
+
+ if (isa<BitCastInst>(UI)) {
+ for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
+ I != E; ++I)
+ srcUseList.push_back(*I);
+ } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) {
+ if (G->hasAllZeroIndices())
+ for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
+ I != E; ++I)
+ srcUseList.push_back(*I);
+ else
+ return false;
+ } else if (UI != C && UI != cpy) {
+ return false;
+ }
+ }
+
+ // Since we're changing the parameter to the callsite, we need to make sure
+ // that what would be the new parameter dominates the callsite.
+ DominatorTree &DT = getAnalysis<DominatorTree>();
+ if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
+ if (!DT.dominates(cpyDestInst, C))
+ return false;
+
+ // In addition to knowing that the call does not access src in some
+ // unexpected manner, for example via a global, which we deduce from
+ // the use analysis, we also need to know that it does not sneakily
+ // access dest. We rely on AA to figure this out for us.
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) !=
+ AliasAnalysis::NoModRef)
+ return false;
+
+ // All the checks have passed, so do the transformation.
+ bool changedArgument = false;
+ for (unsigned i = 0; i < CS.arg_size(); ++i)
+ if (CS.getArgument(i)->stripPointerCasts() == cpySrc) {
+ if (cpySrc->getType() != cpyDest->getType())
+ cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
+ cpyDest->getName(), C);
+ changedArgument = true;
+ if (CS.getArgument(i)->getType() == cpyDest->getType())
+ CS.setArgument(i, cpyDest);
+ else
+ CS.setArgument(i, CastInst::CreatePointerCast(cpyDest,
+ CS.getArgument(i)->getType(), cpyDest->getName(), C));
+ }
+
+ if (!changedArgument)
+ return false;
+
+ // Drop any cached information about the call, because we may have changed
+ // its dependence information by changing its parameter.
+ MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
+ MD.removeInstruction(C);
+
+ // Remove the memcpy
+ MD.removeInstruction(cpy);
+ cpy->eraseFromParent();
+ ++NumMemCpyInstr;
+
+ return true;
+}
+
+/// processMemCpy - perform simplification of memcpy's. If we have memcpy A
+/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
+/// B to be a memcpy from X to Z (or potentially a memmove, depending on
+/// circumstances). This allows later passes to remove the first memcpy
+/// altogether.
+bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
+ MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
+
+ // The are two possible optimizations we can do for memcpy:
+ // a) memcpy-memcpy xform which exposes redundance for DSE.
+ // b) call-memcpy xform for return slot optimization.
+ MemDepResult dep = MD.getDependency(M);
+ if (!dep.isClobber())
+ return false;
+ if (!isa<MemCpyInst>(dep.getInst())) {
+ if (CallInst *C = dyn_cast<CallInst>(dep.getInst()))
+ return performCallSlotOptzn(M, C);
+ return false;
+ }
+
+ MemCpyInst *MDep = cast<MemCpyInst>(dep.getInst());
+
+ // We can only transforms memcpy's where the dest of one is the source of the
+ // other
+ if (M->getSource() != MDep->getDest())
+ return false;
+
+ // Second, the length of the memcpy's must be the same, or the preceeding one
+ // must be larger than the following one.
+ ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
+ ConstantInt *C2 = dyn_cast<ConstantInt>(M->getLength());
+ if (!C1 || !C2)
+ return false;
+
+ uint64_t DepSize = C1->getValue().getZExtValue();
+ uint64_t CpySize = C2->getValue().getZExtValue();
+
+ if (DepSize < CpySize)
+ return false;
+
+ // Finally, we have to make sure that the dest of the second does not
+ // alias the source of the first
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=
+ AliasAnalysis::NoAlias)
+ return false;
+ else if (AA.alias(M->getRawDest(), CpySize, M->getRawSource(), CpySize) !=
+ AliasAnalysis::NoAlias)
+ return false;
+ else if (AA.alias(MDep->getRawDest(), DepSize, MDep->getRawSource(), DepSize)
+ != AliasAnalysis::NoAlias)
+ return false;
+
+ // If all checks passed, then we can transform these memcpy's
+ const Type *ArgTys[3] = { M->getRawDest()->getType(),
+ MDep->getRawSource()->getType(),
+ M->getLength()->getType() };
+ Function *MemCpyFun = Intrinsic::getDeclaration(
+ M->getParent()->getParent()->getParent(),
+ M->getIntrinsicID(), ArgTys, 3);
+
+ Value *Args[5] = {
+ M->getRawDest(), MDep->getRawSource(), M->getLength(),
+ M->getAlignmentCst(), M->getVolatileCst()
+ };
+
+ CallInst *C = CallInst::Create(MemCpyFun, Args, Args+5, "", M);
+
+
+ // If C and M don't interfere, then this is a valid transformation. If they
+ // did, this would mean that the two sources overlap, which would be bad.
+ if (MD.getDependency(C) == dep) {
+ MD.removeInstruction(M);
+ M->eraseFromParent();
+ ++NumMemCpyInstr;
+ return true;
+ }
+
+ // Otherwise, there was no point in doing this, so we remove the call we
+ // inserted and act like nothing happened.
+ MD.removeInstruction(C);
+ C->eraseFromParent();
+ return false;
+}
+
+/// processMemMove - Transforms memmove calls to memcpy calls when the src/dst
+/// are guaranteed not to alias.
+bool MemCpyOpt::processMemMove(MemMoveInst *M) {
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ // If the memmove is a constant size, use it for the alias query, this allows
+ // us to optimize things like: memmove(P, P+64, 64);
+ uint64_t MemMoveSize = ~0ULL;
+ if (ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength()))
+ MemMoveSize = Len->getZExtValue();
+
+ // See if the pointers alias.
+ if (AA.alias(M->getRawDest(), MemMoveSize, M->getRawSource(), MemMoveSize) !=
+ AliasAnalysis::NoAlias)
+ return false;
+
+ DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
+
+ // If not, then we know we can transform this.
+ Module *Mod = M->getParent()->getParent()->getParent();
+ const Type *ArgTys[3] = { M->getRawDest()->getType(),
+ M->getRawSource()->getType(),
+ M->getLength()->getType() };
+ M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy,
+ ArgTys, 3));
+
+ // MemDep may have over conservative information about this instruction, just
+ // conservatively flush it from the cache.
+ getAnalysis<MemoryDependenceAnalysis>().removeInstruction(M);
+
+ ++NumMoveToCpy;
+ return true;
+}
+
+
+// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN.
+bool MemCpyOpt::iterateOnFunction(Function &F) {
+ bool MadeChange = false;
+
+ // Walk all instruction in the function.
+ for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+ BI != BE;) {
+ // Avoid invalidating the iterator.
+ Instruction *I = BI++;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ MadeChange |= processStore(SI, BI);
+ else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
+ MadeChange |= processMemCpy(M);
+ else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) {
+ if (processMemMove(M)) {
+ --BI; // Reprocess the new memcpy.
+ MadeChange = true;
+ }
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+// MemCpyOpt::runOnFunction - This is the main transformation entry point for a
+// function.
+//
+bool MemCpyOpt::runOnFunction(Function &F) {
+ bool MadeChange = false;
+ while (1) {
+ if (!iterateOnFunction(F))
+ break;
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
new file mode 100644
index 0000000..b8afcc1
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -0,0 +1,1075 @@
+//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass reassociates commutative expressions in an order that is designed
+// to promote better constant propagation, GCSE, LICM, PRE, etc.
+//
+// For example: 4 + (x + 5) -> x + (4 + 5)
+//
+// In the implementation of this algorithm, constants are assigned rank = 0,
+// function arguments are rank = 1, and other values are assigned ranks
+// corresponding to the reverse post order traversal of current function
+// (starting at 2), which effectively gives values in deep loops higher rank
+// than values not in loops.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reassociate"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/DenseMap.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumLinear , "Number of insts linearized");
+STATISTIC(NumChanged, "Number of insts reassociated");
+STATISTIC(NumAnnihil, "Number of expr tree annihilated");
+STATISTIC(NumFactor , "Number of multiplies factored");
+
+namespace {
+ struct ValueEntry {
+ unsigned Rank;
+ Value *Op;
+ ValueEntry(unsigned R, Value *O) : Rank(R), Op(O) {}
+ };
+ inline bool operator<(const ValueEntry &LHS, const ValueEntry &RHS) {
+ return LHS.Rank > RHS.Rank; // Sort so that highest rank goes to start.
+ }
+}
+
+#ifndef NDEBUG
+/// PrintOps - Print out the expression identified in the Ops list.
+///
+static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
+ Module *M = I->getParent()->getParent()->getParent();
+ dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
+ << *Ops[0].Op->getType() << '\t';
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ dbgs() << "[ ";
+ WriteAsOperand(dbgs(), Ops[i].Op, false, M);
+ dbgs() << ", #" << Ops[i].Rank << "] ";
+ }
+}
+#endif
+
+namespace {
+ class Reassociate : public FunctionPass {
+ DenseMap<BasicBlock*, unsigned> RankMap;
+ DenseMap<AssertingVH<>, unsigned> ValueRankMap;
+ bool MadeChange;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ Reassociate() : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+ private:
+ void BuildRankMap(Function &F);
+ unsigned getRank(Value *V);
+ Value *ReassociateExpression(BinaryOperator *I);
+ void RewriteExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops,
+ unsigned Idx = 0);
+ Value *OptimizeExpression(BinaryOperator *I,
+ SmallVectorImpl<ValueEntry> &Ops);
+ Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops);
+ void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
+ void LinearizeExpr(BinaryOperator *I);
+ Value *RemoveFactorFromExpression(Value *V, Value *Factor);
+ void ReassociateBB(BasicBlock *BB);
+
+ void RemoveDeadBinaryOp(Value *V);
+ };
+}
+
+char Reassociate::ID = 0;
+INITIALIZE_PASS(Reassociate, "reassociate",
+ "Reassociate expressions", false, false);
+
+// Public interface to the Reassociate pass
+FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
+
+void Reassociate::RemoveDeadBinaryOp(Value *V) {
+ Instruction *Op = dyn_cast<Instruction>(V);
+ if (!Op || !isa<BinaryOperator>(Op) || !Op->use_empty())
+ return;
+
+ Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1);
+
+ ValueRankMap.erase(Op);
+ Op->eraseFromParent();
+ RemoveDeadBinaryOp(LHS);
+ RemoveDeadBinaryOp(RHS);
+}
+
+
+static bool isUnmovableInstruction(Instruction *I) {
+ if (I->getOpcode() == Instruction::PHI ||
+ I->getOpcode() == Instruction::Alloca ||
+ I->getOpcode() == Instruction::Load ||
+ I->getOpcode() == Instruction::Invoke ||
+ (I->getOpcode() == Instruction::Call &&
+ !isa<DbgInfoIntrinsic>(I)) ||
+ I->getOpcode() == Instruction::UDiv ||
+ I->getOpcode() == Instruction::SDiv ||
+ I->getOpcode() == Instruction::FDiv ||
+ I->getOpcode() == Instruction::URem ||
+ I->getOpcode() == Instruction::SRem ||
+ I->getOpcode() == Instruction::FRem)
+ return true;
+ return false;
+}
+
+void Reassociate::BuildRankMap(Function &F) {
+ unsigned i = 2;
+
+ // Assign distinct ranks to function arguments
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
+ ValueRankMap[&*I] = ++i;
+
+ ReversePostOrderTraversal<Function*> RPOT(&F);
+ for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
+ E = RPOT.end(); I != E; ++I) {
+ BasicBlock *BB = *I;
+ unsigned BBRank = RankMap[BB] = ++i << 16;
+
+ // Walk the basic block, adding precomputed ranks for any instructions that
+ // we cannot move. This ensures that the ranks for these instructions are
+ // all different in the block.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (isUnmovableInstruction(I))
+ ValueRankMap[&*I] = ++BBRank;
+ }
+}
+
+unsigned Reassociate::getRank(Value *V) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) {
+ if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
+ return 0; // Otherwise it's a global or constant, rank 0.
+ }
+
+ if (unsigned Rank = ValueRankMap[I])
+ return Rank; // Rank already known?
+
+ // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
+ // we can reassociate expressions for code motion! Since we do not recurse
+ // for PHI nodes, we cannot have infinite recursion here, because there
+ // cannot be loops in the value graph that do not go through PHI nodes.
+ unsigned Rank = 0, MaxRank = RankMap[I->getParent()];
+ for (unsigned i = 0, e = I->getNumOperands();
+ i != e && Rank != MaxRank; ++i)
+ Rank = std::max(Rank, getRank(I->getOperand(i)));
+
+ // If this is a not or neg instruction, do not count it for rank. This
+ // assures us that X and ~X will have the same rank.
+ if (!I->getType()->isIntegerTy() ||
+ (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I)))
+ ++Rank;
+
+ //DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = "
+ // << Rank << "\n");
+
+ return ValueRankMap[I] = Rank;
+}
+
+/// isReassociableOp - Return true if V is an instruction of the specified
+/// opcode and if it only has one use.
+static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
+ if ((V->hasOneUse() || V->use_empty()) && isa<Instruction>(V) &&
+ cast<Instruction>(V)->getOpcode() == Opcode)
+ return cast<BinaryOperator>(V);
+ return 0;
+}
+
+/// LowerNegateToMultiply - Replace 0-X with X*-1.
+///
+static Instruction *LowerNegateToMultiply(Instruction *Neg,
+ DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
+ Constant *Cst = Constant::getAllOnesValue(Neg->getType());
+
+ Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
+ ValueRankMap.erase(Neg);
+ Res->takeName(Neg);
+ Neg->replaceAllUsesWith(Res);
+ Neg->eraseFromParent();
+ return Res;
+}
+
+// Given an expression of the form '(A+B)+(D+C)', turn it into '(((A+B)+C)+D)'.
+// Note that if D is also part of the expression tree that we recurse to
+// linearize it as well. Besides that case, this does not recurse into A,B, or
+// C.
+void Reassociate::LinearizeExpr(BinaryOperator *I) {
+ BinaryOperator *LHS = cast<BinaryOperator>(I->getOperand(0));
+ BinaryOperator *RHS = cast<BinaryOperator>(I->getOperand(1));
+ assert(isReassociableOp(LHS, I->getOpcode()) &&
+ isReassociableOp(RHS, I->getOpcode()) &&
+ "Not an expression that needs linearization?");
+
+ DEBUG(dbgs() << "Linear" << *LHS << '\n' << *RHS << '\n' << *I << '\n');
+
+ // Move the RHS instruction to live immediately before I, avoiding breaking
+ // dominator properties.
+ RHS->moveBefore(I);
+
+ // Move operands around to do the linearization.
+ I->setOperand(1, RHS->getOperand(0));
+ RHS->setOperand(0, LHS);
+ I->setOperand(0, RHS);
+
+ ++NumLinear;
+ MadeChange = true;
+ DEBUG(dbgs() << "Linearized: " << *I << '\n');
+
+ // If D is part of this expression tree, tail recurse.
+ if (isReassociableOp(I->getOperand(1), I->getOpcode()))
+ LinearizeExpr(I);
+}
+
+
+/// LinearizeExprTree - Given an associative binary expression tree, traverse
+/// all of the uses putting it into canonical form. This forces a left-linear
+/// form of the expression (((a+b)+c)+d), and collects information about the
+/// rank of the non-tree operands.
+///
+/// NOTE: These intentionally destroys the expression tree operands (turning
+/// them into undef values) to reduce #uses of the values. This means that the
+/// caller MUST use something like RewriteExprTree to put the values back in.
+///
+void Reassociate::LinearizeExprTree(BinaryOperator *I,
+ SmallVectorImpl<ValueEntry> &Ops) {
+ Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
+ unsigned Opcode = I->getOpcode();
+
+ // First step, linearize the expression if it is in ((A+B)+(C+D)) form.
+ BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode);
+ BinaryOperator *RHSBO = isReassociableOp(RHS, Opcode);
+
+ // If this is a multiply expression tree and it contains internal negations,
+ // transform them into multiplies by -1 so they can be reassociated.
+ if (I->getOpcode() == Instruction::Mul) {
+ if (!LHSBO && LHS->hasOneUse() && BinaryOperator::isNeg(LHS)) {
+ LHS = LowerNegateToMultiply(cast<Instruction>(LHS), ValueRankMap);
+ LHSBO = isReassociableOp(LHS, Opcode);
+ }
+ if (!RHSBO && RHS->hasOneUse() && BinaryOperator::isNeg(RHS)) {
+ RHS = LowerNegateToMultiply(cast<Instruction>(RHS), ValueRankMap);
+ RHSBO = isReassociableOp(RHS, Opcode);
+ }
+ }
+
+ if (!LHSBO) {
+ if (!RHSBO) {
+ // Neither the LHS or RHS as part of the tree, thus this is a leaf. As
+ // such, just remember these operands and their rank.
+ Ops.push_back(ValueEntry(getRank(LHS), LHS));
+ Ops.push_back(ValueEntry(getRank(RHS), RHS));
+
+ // Clear the leaves out.
+ I->setOperand(0, UndefValue::get(I->getType()));
+ I->setOperand(1, UndefValue::get(I->getType()));
+ return;
+ }
+
+ // Turn X+(Y+Z) -> (Y+Z)+X
+ std::swap(LHSBO, RHSBO);
+ std::swap(LHS, RHS);
+ bool Success = !I->swapOperands();
+ assert(Success && "swapOperands failed");
+ Success = false;
+ MadeChange = true;
+ } else if (RHSBO) {
+ // Turn (A+B)+(C+D) -> (((A+B)+C)+D). This guarantees the RHS is not
+ // part of the expression tree.
+ LinearizeExpr(I);
+ LHS = LHSBO = cast<BinaryOperator>(I->getOperand(0));
+ RHS = I->getOperand(1);
+ RHSBO = 0;
+ }
+
+ // Okay, now we know that the LHS is a nested expression and that the RHS is
+ // not. Perform reassociation.
+ assert(!isReassociableOp(RHS, Opcode) && "LinearizeExpr failed!");
+
+ // Move LHS right before I to make sure that the tree expression dominates all
+ // values.
+ LHSBO->moveBefore(I);
+
+ // Linearize the expression tree on the LHS.
+ LinearizeExprTree(LHSBO, Ops);
+
+ // Remember the RHS operand and its rank.
+ Ops.push_back(ValueEntry(getRank(RHS), RHS));
+
+ // Clear the RHS leaf out.
+ I->setOperand(1, UndefValue::get(I->getType()));
+}
+
+// RewriteExprTree - Now that the operands for this expression tree are
+// linearized and optimized, emit them in-order. This function is written to be
+// tail recursive.
+void Reassociate::RewriteExprTree(BinaryOperator *I,
+ SmallVectorImpl<ValueEntry> &Ops,
+ unsigned i) {
+ if (i+2 == Ops.size()) {
+ if (I->getOperand(0) != Ops[i].Op ||
+ I->getOperand(1) != Ops[i+1].Op) {
+ Value *OldLHS = I->getOperand(0);
+ DEBUG(dbgs() << "RA: " << *I << '\n');
+ I->setOperand(0, Ops[i].Op);
+ I->setOperand(1, Ops[i+1].Op);
+ DEBUG(dbgs() << "TO: " << *I << '\n');
+ MadeChange = true;
+ ++NumChanged;
+
+ // If we reassociated a tree to fewer operands (e.g. (1+a+2) -> (a+3)
+ // delete the extra, now dead, nodes.
+ RemoveDeadBinaryOp(OldLHS);
+ }
+ return;
+ }
+ assert(i+2 < Ops.size() && "Ops index out of range!");
+
+ if (I->getOperand(1) != Ops[i].Op) {
+ DEBUG(dbgs() << "RA: " << *I << '\n');
+ I->setOperand(1, Ops[i].Op);
+ DEBUG(dbgs() << "TO: " << *I << '\n');
+ MadeChange = true;
+ ++NumChanged;
+ }
+
+ BinaryOperator *LHS = cast<BinaryOperator>(I->getOperand(0));
+ assert(LHS->getOpcode() == I->getOpcode() &&
+ "Improper expression tree!");
+
+ // Compactify the tree instructions together with each other to guarantee
+ // that the expression tree is dominated by all of Ops.
+ LHS->moveBefore(I);
+ RewriteExprTree(LHS, Ops, i+1);
+}
+
+
+
+// NegateValue - Insert instructions before the instruction pointed to by BI,
+// that computes the negative version of the value specified. The negative
+// version of the value is returned, and BI is left pointing at the instruction
+// that should be processed next by the reassociation pass.
+//
+static Value *NegateValue(Value *V, Instruction *BI) {
+ if (Constant *C = dyn_cast<Constant>(V))
+ return ConstantExpr::getNeg(C);
+
+ // We are trying to expose opportunity for reassociation. One of the things
+ // that we want to do to achieve this is to push a negation as deep into an
+ // expression chain as possible, to expose the add instructions. In practice,
+ // this means that we turn this:
+ // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D
+ // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
+ // the constants. We assume that instcombine will clean up the mess later if
+ // we introduce tons of unnecessary negation instructions.
+ //
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (I->getOpcode() == Instruction::Add && I->hasOneUse()) {
+ // Push the negates through the add.
+ I->setOperand(0, NegateValue(I->getOperand(0), BI));
+ I->setOperand(1, NegateValue(I->getOperand(1), BI));
+
+ // We must move the add instruction here, because the neg instructions do
+ // not dominate the old add instruction in general. By moving it, we are
+ // assured that the neg instructions we just inserted dominate the
+ // instruction we are about to insert after them.
+ //
+ I->moveBefore(BI);
+ I->setName(I->getName()+".neg");
+ return I;
+ }
+
+ // Okay, we need to materialize a negated version of V with an instruction.
+ // Scan the use lists of V to see if we have one already.
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+ User *U = *UI;
+ if (!BinaryOperator::isNeg(U)) continue;
+
+ // We found one! Now we have to make sure that the definition dominates
+ // this use. We do this by moving it to the entry block (if it is a
+ // non-instruction value) or right after the definition. These negates will
+ // be zapped by reassociate later, so we don't need much finesse here.
+ BinaryOperator *TheNeg = cast<BinaryOperator>(U);
+
+ // Verify that the negate is in this function, V might be a constant expr.
+ if (TheNeg->getParent()->getParent() != BI->getParent()->getParent())
+ continue;
+
+ BasicBlock::iterator InsertPt;
+ if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) {
+ InsertPt = II->getNormalDest()->begin();
+ } else {
+ InsertPt = InstInput;
+ ++InsertPt;
+ }
+ while (isa<PHINode>(InsertPt)) ++InsertPt;
+ } else {
+ InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
+ }
+ TheNeg->moveBefore(InsertPt);
+ return TheNeg;
+ }
+
+ // Insert a 'neg' instruction that subtracts the value from zero to get the
+ // negation.
+ return BinaryOperator::CreateNeg(V, V->getName() + ".neg", BI);
+}
+
+/// ShouldBreakUpSubtract - Return true if we should break up this subtract of
+/// X-Y into (X + -Y).
+static bool ShouldBreakUpSubtract(Instruction *Sub) {
+ // If this is a negation, we can't split it up!
+ if (BinaryOperator::isNeg(Sub))
+ return false;
+
+ // Don't bother to break this up unless either the LHS is an associable add or
+ // subtract or if this is only used by one.
+ if (isReassociableOp(Sub->getOperand(0), Instruction::Add) ||
+ isReassociableOp(Sub->getOperand(0), Instruction::Sub))
+ return true;
+ if (isReassociableOp(Sub->getOperand(1), Instruction::Add) ||
+ isReassociableOp(Sub->getOperand(1), Instruction::Sub))
+ return true;
+ if (Sub->hasOneUse() &&
+ (isReassociableOp(Sub->use_back(), Instruction::Add) ||
+ isReassociableOp(Sub->use_back(), Instruction::Sub)))
+ return true;
+
+ return false;
+}
+
+/// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is
+/// only used by an add, transform this into (X+(0-Y)) to promote better
+/// reassociation.
+static Instruction *BreakUpSubtract(Instruction *Sub,
+ DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
+ // Convert a subtract into an add and a neg instruction. This allows sub
+ // instructions to be commuted with other add instructions.
+ //
+ // Calculate the negative value of Operand 1 of the sub instruction,
+ // and set it as the RHS of the add instruction we just made.
+ //
+ Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
+ Instruction *New =
+ BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub);
+ New->takeName(Sub);
+
+ // Everyone now refers to the add instruction.
+ ValueRankMap.erase(Sub);
+ Sub->replaceAllUsesWith(New);
+ Sub->eraseFromParent();
+
+ DEBUG(dbgs() << "Negated: " << *New << '\n');
+ return New;
+}
+
+/// ConvertShiftToMul - If this is a shift of a reassociable multiply or is used
+/// by one, change this into a multiply by a constant to assist with further
+/// reassociation.
+static Instruction *ConvertShiftToMul(Instruction *Shl,
+ DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
+ // If an operand of this shift is a reassociable multiply, or if the shift
+ // is used by a reassociable multiply or add, turn into a multiply.
+ if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) ||
+ (Shl->hasOneUse() &&
+ (isReassociableOp(Shl->use_back(), Instruction::Mul) ||
+ isReassociableOp(Shl->use_back(), Instruction::Add)))) {
+ Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
+ MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1)));
+
+ Instruction *Mul =
+ BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl);
+ ValueRankMap.erase(Shl);
+ Mul->takeName(Shl);
+ Shl->replaceAllUsesWith(Mul);
+ Shl->eraseFromParent();
+ return Mul;
+ }
+ return 0;
+}
+
+// Scan backwards and forwards among values with the same rank as element i to
+// see if X exists. If X does not exist, return i. This is useful when
+// scanning for 'x' when we see '-x' because they both get the same rank.
+static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i,
+ Value *X) {
+ unsigned XRank = Ops[i].Rank;
+ unsigned e = Ops.size();
+ for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j)
+ if (Ops[j].Op == X)
+ return j;
+ // Scan backwards.
+ for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j)
+ if (Ops[j].Op == X)
+ return j;
+ return i;
+}
+
+/// EmitAddTreeOfValues - Emit a tree of add instructions, summing Ops together
+/// and returning the result. Insert the tree before I.
+static Value *EmitAddTreeOfValues(Instruction *I, SmallVectorImpl<Value*> &Ops){
+ if (Ops.size() == 1) return Ops.back();
+
+ Value *V1 = Ops.back();
+ Ops.pop_back();
+ Value *V2 = EmitAddTreeOfValues(I, Ops);
+ return BinaryOperator::CreateAdd(V2, V1, "tmp", I);
+}
+
+/// RemoveFactorFromExpression - If V is an expression tree that is a
+/// multiplication sequence, and if this sequence contains a multiply by Factor,
+/// remove Factor from the tree and return the new tree.
+Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
+ BinaryOperator *BO = isReassociableOp(V, Instruction::Mul);
+ if (!BO) return 0;
+
+ SmallVector<ValueEntry, 8> Factors;
+ LinearizeExprTree(BO, Factors);
+
+ bool FoundFactor = false;
+ bool NeedsNegate = false;
+ for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
+ if (Factors[i].Op == Factor) {
+ FoundFactor = true;
+ Factors.erase(Factors.begin()+i);
+ break;
+ }
+
+ // If this is a negative version of this factor, remove it.
+ if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor))
+ if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
+ if (FC1->getValue() == -FC2->getValue()) {
+ FoundFactor = NeedsNegate = true;
+ Factors.erase(Factors.begin()+i);
+ break;
+ }
+ }
+
+ if (!FoundFactor) {
+ // Make sure to restore the operands to the expression tree.
+ RewriteExprTree(BO, Factors);
+ return 0;
+ }
+
+ BasicBlock::iterator InsertPt = BO; ++InsertPt;
+
+ // If this was just a single multiply, remove the multiply and return the only
+ // remaining operand.
+ if (Factors.size() == 1) {
+ ValueRankMap.erase(BO);
+ BO->eraseFromParent();
+ V = Factors[0].Op;
+ } else {
+ RewriteExprTree(BO, Factors);
+ V = BO;
+ }
+
+ if (NeedsNegate)
+ V = BinaryOperator::CreateNeg(V, "neg", InsertPt);
+
+ return V;
+}
+
+/// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively
+/// add its operands as factors, otherwise add V to the list of factors.
+///
+/// Ops is the top-level list of add operands we're trying to factor.
+static void FindSingleUseMultiplyFactors(Value *V,
+ SmallVectorImpl<Value*> &Factors,
+ const SmallVectorImpl<ValueEntry> &Ops,
+ bool IsRoot) {
+ BinaryOperator *BO;
+ if (!(V->hasOneUse() || V->use_empty()) || // More than one use.
+ !(BO = dyn_cast<BinaryOperator>(V)) ||
+ BO->getOpcode() != Instruction::Mul) {
+ Factors.push_back(V);
+ return;
+ }
+
+ // If this value has a single use because it is another input to the add
+ // tree we're reassociating and we dropped its use, it actually has two
+ // uses and we can't factor it.
+ if (!IsRoot) {
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (Ops[i].Op == V) {
+ Factors.push_back(V);
+ return;
+ }
+ }
+
+
+ // Otherwise, add the LHS and RHS to the list of factors.
+ FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops, false);
+ FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops, false);
+}
+
+/// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor'
+/// instruction. This optimizes based on identities. If it can be reduced to
+/// a single Value, it is returned, otherwise the Ops list is mutated as
+/// necessary.
+static Value *OptimizeAndOrXor(unsigned Opcode,
+ SmallVectorImpl<ValueEntry> &Ops) {
+ // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
+ // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ // First, check for X and ~X in the operand list.
+ assert(i < Ops.size());
+ if (BinaryOperator::isNot(Ops[i].Op)) { // Cannot occur for ^.
+ Value *X = BinaryOperator::getNotArgument(Ops[i].Op);
+ unsigned FoundX = FindInOperandList(Ops, i, X);
+ if (FoundX != i) {
+ if (Opcode == Instruction::And) // ...&X&~X = 0
+ return Constant::getNullValue(X->getType());
+
+ if (Opcode == Instruction::Or) // ...|X|~X = -1
+ return Constant::getAllOnesValue(X->getType());
+ }
+ }
+
+ // Next, check for duplicate pairs of values, which we assume are next to
+ // each other, due to our sorting criteria.
+ assert(i < Ops.size());
+ if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) {
+ if (Opcode == Instruction::And || Opcode == Instruction::Or) {
+ // Drop duplicate values for And and Or.
+ Ops.erase(Ops.begin()+i);
+ --i; --e;
+ ++NumAnnihil;
+ continue;
+ }
+
+ // Drop pairs of values for Xor.
+ assert(Opcode == Instruction::Xor);
+ if (e == 2)
+ return Constant::getNullValue(Ops[0].Op->getType());
+
+ // Y ^ X^X -> Y
+ Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
+ i -= 1; e -= 2;
+ ++NumAnnihil;
+ }
+ }
+ return 0;
+}
+
+/// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This
+/// optimizes based on identities. If it can be reduced to a single Value, it
+/// is returned, otherwise the Ops list is mutated as necessary.
+Value *Reassociate::OptimizeAdd(Instruction *I,
+ SmallVectorImpl<ValueEntry> &Ops) {
+ // Scan the operand lists looking for X and -X pairs. If we find any, we
+ // can simplify the expression. X+-X == 0. While we're at it, scan for any
+ // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
+ //
+ // TODO: We could handle "X + ~X" -> "-1" if we wanted, since "-X = ~X+1".
+ //
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ Value *TheOp = Ops[i].Op;
+ // Check to see if we've seen this operand before. If so, we factor all
+ // instances of the operand together. Due to our sorting criteria, we know
+ // that these need to be next to each other in the vector.
+ if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) {
+ // Rescan the list, remove all instances of this operand from the expr.
+ unsigned NumFound = 0;
+ do {
+ Ops.erase(Ops.begin()+i);
+ ++NumFound;
+ } while (i != Ops.size() && Ops[i].Op == TheOp);
+
+ DEBUG(errs() << "\nFACTORING [" << NumFound << "]: " << *TheOp << '\n');
+ ++NumFactor;
+
+ // Insert a new multiply.
+ Value *Mul = ConstantInt::get(cast<IntegerType>(I->getType()), NumFound);
+ Mul = BinaryOperator::CreateMul(TheOp, Mul, "factor", I);
+
+ // Now that we have inserted a multiply, optimize it. This allows us to
+ // handle cases that require multiple factoring steps, such as this:
+ // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
+ Mul = ReassociateExpression(cast<BinaryOperator>(Mul));
+
+ // If every add operand was a duplicate, return the multiply.
+ if (Ops.empty())
+ return Mul;
+
+ // Otherwise, we had some input that didn't have the dupe, such as
+ // "A + A + B" -> "A*2 + B". Add the new multiply to the list of
+ // things being added by this operation.
+ Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul));
+
+ --i;
+ e = Ops.size();
+ continue;
+ }
+
+ // Check for X and -X in the operand list.
+ if (!BinaryOperator::isNeg(TheOp))
+ continue;
+
+ Value *X = BinaryOperator::getNegArgument(TheOp);
+ unsigned FoundX = FindInOperandList(Ops, i, X);
+ if (FoundX == i)
+ continue;
+
+ // Remove X and -X from the operand list.
+ if (Ops.size() == 2)
+ return Constant::getNullValue(X->getType());
+
+ Ops.erase(Ops.begin()+i);
+ if (i < FoundX)
+ --FoundX;
+ else
+ --i; // Need to back up an extra one.
+ Ops.erase(Ops.begin()+FoundX);
+ ++NumAnnihil;
+ --i; // Revisit element.
+ e -= 2; // Removed two elements.
+ }
+
+ // Scan the operand list, checking to see if there are any common factors
+ // between operands. Consider something like A*A+A*B*C+D. We would like to
+ // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
+ // To efficiently find this, we count the number of times a factor occurs
+ // for any ADD operands that are MULs.
+ DenseMap<Value*, unsigned> FactorOccurrences;
+
+ // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
+ // where they are actually the same multiply.
+ unsigned MaxOcc = 0;
+ Value *MaxOccVal = 0;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op);
+ if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty())
+ continue;
+
+ // Compute all of the factors of this added value.
+ SmallVector<Value*, 8> Factors;
+ FindSingleUseMultiplyFactors(BOp, Factors, Ops, true);
+ assert(Factors.size() > 1 && "Bad linearize!");
+
+ // Add one to FactorOccurrences for each unique factor in this op.
+ SmallPtrSet<Value*, 8> Duplicates;
+ for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
+ Value *Factor = Factors[i];
+ if (!Duplicates.insert(Factor)) continue;
+
+ unsigned Occ = ++FactorOccurrences[Factor];
+ if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; }
+
+ // If Factor is a negative constant, add the negated value as a factor
+ // because we can percolate the negate out. Watch for minint, which
+ // cannot be positivified.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor))
+ if (CI->getValue().isNegative() && !CI->getValue().isMinSignedValue()) {
+ Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
+ assert(!Duplicates.count(Factor) &&
+ "Shouldn't have two constant factors, missed a canonicalize");
+
+ unsigned Occ = ++FactorOccurrences[Factor];
+ if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; }
+ }
+ }
+ }
+
+ // If any factor occurred more than one time, we can pull it out.
+ if (MaxOcc > 1) {
+ DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << '\n');
+ ++NumFactor;
+
+ // Create a new instruction that uses the MaxOccVal twice. If we don't do
+ // this, we could otherwise run into situations where removing a factor
+ // from an expression will drop a use of maxocc, and this can cause
+ // RemoveFactorFromExpression on successive values to behave differently.
+ Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal);
+ SmallVector<Value*, 4> NewMulOps;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ // Only try to remove factors from expressions we're allowed to.
+ BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op);
+ if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty())
+ continue;
+
+ if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
+ NewMulOps.push_back(V);
+ Ops.erase(Ops.begin()+i);
+ --i; --e;
+ }
+ }
+
+ // No need for extra uses anymore.
+ delete DummyInst;
+
+ unsigned NumAddedValues = NewMulOps.size();
+ Value *V = EmitAddTreeOfValues(I, NewMulOps);
+
+ // Now that we have inserted the add tree, optimize it. This allows us to
+ // handle cases that require multiple factoring steps, such as this:
+ // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C))
+ assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
+ (void)NumAddedValues;
+ V = ReassociateExpression(cast<BinaryOperator>(V));
+
+ // Create the multiply.
+ Value *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I);
+
+ // Rerun associate on the multiply in case the inner expression turned into
+ // a multiply. We want to make sure that we keep things in canonical form.
+ V2 = ReassociateExpression(cast<BinaryOperator>(V2));
+
+ // If every add operand included the factor (e.g. "A*B + A*C"), then the
+ // entire result expression is just the multiply "A*(B+C)".
+ if (Ops.empty())
+ return V2;
+
+ // Otherwise, we had some input that didn't have the factor, such as
+ // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of
+ // things being added by this operation.
+ Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
+ }
+
+ return 0;
+}
+
+Value *Reassociate::OptimizeExpression(BinaryOperator *I,
+ SmallVectorImpl<ValueEntry> &Ops) {
+ // Now that we have the linearized expression tree, try to optimize it.
+ // Start by folding any constants that we found.
+ bool IterateOptimization = false;
+ if (Ops.size() == 1) return Ops[0].Op;
+
+ unsigned Opcode = I->getOpcode();
+
+ if (Constant *V1 = dyn_cast<Constant>(Ops[Ops.size()-2].Op))
+ if (Constant *V2 = dyn_cast<Constant>(Ops.back().Op)) {
+ Ops.pop_back();
+ Ops.back().Op = ConstantExpr::get(Opcode, V1, V2);
+ return OptimizeExpression(I, Ops);
+ }
+
+ // Check for destructive annihilation due to a constant being used.
+ if (ConstantInt *CstVal = dyn_cast<ConstantInt>(Ops.back().Op))
+ switch (Opcode) {
+ default: break;
+ case Instruction::And:
+ if (CstVal->isZero()) // X & 0 -> 0
+ return CstVal;
+ if (CstVal->isAllOnesValue()) // X & -1 -> X
+ Ops.pop_back();
+ break;
+ case Instruction::Mul:
+ if (CstVal->isZero()) { // X * 0 -> 0
+ ++NumAnnihil;
+ return CstVal;
+ }
+
+ if (cast<ConstantInt>(CstVal)->isOne())
+ Ops.pop_back(); // X * 1 -> X
+ break;
+ case Instruction::Or:
+ if (CstVal->isAllOnesValue()) // X | -1 -> -1
+ return CstVal;
+ // FALLTHROUGH!
+ case Instruction::Add:
+ case Instruction::Xor:
+ if (CstVal->isZero()) // X [|^+] 0 -> X
+ Ops.pop_back();
+ break;
+ }
+ if (Ops.size() == 1) return Ops[0].Op;
+
+ // Handle destructive annihilation due to identities between elements in the
+ // argument list here.
+ switch (Opcode) {
+ default: break;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ unsigned NumOps = Ops.size();
+ if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
+ return Result;
+ IterateOptimization |= Ops.size() != NumOps;
+ break;
+ }
+
+ case Instruction::Add: {
+ unsigned NumOps = Ops.size();
+ if (Value *Result = OptimizeAdd(I, Ops))
+ return Result;
+ IterateOptimization |= Ops.size() != NumOps;
+ }
+
+ break;
+ //case Instruction::Mul:
+ }
+
+ if (IterateOptimization)
+ return OptimizeExpression(I, Ops);
+ return 0;
+}
+
+
+/// ReassociateBB - Inspect all of the instructions in this basic block,
+/// reassociating them as we go.
+void Reassociate::ReassociateBB(BasicBlock *BB) {
+ for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) {
+ Instruction *BI = BBI++;
+ if (BI->getOpcode() == Instruction::Shl &&
+ isa<ConstantInt>(BI->getOperand(1)))
+ if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) {
+ MadeChange = true;
+ BI = NI;
+ }
+
+ // Reject cases where it is pointless to do this.
+ if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() ||
+ BI->getType()->isVectorTy())
+ continue; // Floating point ops are not associative.
+
+ // Do not reassociate boolean (i1) expressions. We want to preserve the
+ // original order of evaluation for short-circuited comparisons that
+ // SimplifyCFG has folded to AND/OR expressions. If the expression
+ // is not further optimized, it is likely to be transformed back to a
+ // short-circuited form for code gen, and the source order may have been
+ // optimized for the most likely conditions.
+ if (BI->getType()->isIntegerTy(1))
+ continue;
+
+ // If this is a subtract instruction which is not already in negate form,
+ // see if we can convert it to X+-Y.
+ if (BI->getOpcode() == Instruction::Sub) {
+ if (ShouldBreakUpSubtract(BI)) {
+ BI = BreakUpSubtract(BI, ValueRankMap);
+ // Reset the BBI iterator in case BreakUpSubtract changed the
+ // instruction it points to.
+ BBI = BI;
+ ++BBI;
+ MadeChange = true;
+ } else if (BinaryOperator::isNeg(BI)) {
+ // Otherwise, this is a negation. See if the operand is a multiply tree
+ // and if this is not an inner node of a multiply tree.
+ if (isReassociableOp(BI->getOperand(1), Instruction::Mul) &&
+ (!BI->hasOneUse() ||
+ !isReassociableOp(BI->use_back(), Instruction::Mul))) {
+ BI = LowerNegateToMultiply(BI, ValueRankMap);
+ MadeChange = true;
+ }
+ }
+ }
+
+ // If this instruction is a commutative binary operator, process it.
+ if (!BI->isAssociative()) continue;
+ BinaryOperator *I = cast<BinaryOperator>(BI);
+
+ // If this is an interior node of a reassociable tree, ignore it until we
+ // get to the root of the tree, to avoid N^2 analysis.
+ if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode()))
+ continue;
+
+ // If this is an add tree that is used by a sub instruction, ignore it
+ // until we process the subtract.
+ if (I->hasOneUse() && I->getOpcode() == Instruction::Add &&
+ cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub)
+ continue;
+
+ ReassociateExpression(I);
+ }
+}
+
+Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
+
+ // First, walk the expression tree, linearizing the tree, collecting the
+ // operand information.
+ SmallVector<ValueEntry, 8> Ops;
+ LinearizeExprTree(I, Ops);
+
+ DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
+
+ // Now that we have linearized the tree to a list and have gathered all of
+ // the operands and their ranks, sort the operands by their rank. Use a
+ // stable_sort so that values with equal ranks will have their relative
+ // positions maintained (and so the compiler is deterministic). Note that
+ // this sorts so that the highest ranking values end up at the beginning of
+ // the vector.
+ std::stable_sort(Ops.begin(), Ops.end());
+
+ // OptimizeExpression - Now that we have the expression tree in a convenient
+ // sorted form, optimize it globally if possible.
+ if (Value *V = OptimizeExpression(I, Ops)) {
+ // This expression tree simplified to something that isn't a tree,
+ // eliminate it.
+ DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
+ I->replaceAllUsesWith(V);
+ RemoveDeadBinaryOp(I);
+ ++NumAnnihil;
+ return V;
+ }
+
+ // We want to sink immediates as deeply as possible except in the case where
+ // this is a multiply tree used only by an add, and the immediate is a -1.
+ // In this case we reassociate to put the negation on the outside so that we
+ // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
+ if (I->getOpcode() == Instruction::Mul && I->hasOneUse() &&
+ cast<Instruction>(I->use_back())->getOpcode() == Instruction::Add &&
+ isa<ConstantInt>(Ops.back().Op) &&
+ cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) {
+ ValueEntry Tmp = Ops.pop_back_val();
+ Ops.insert(Ops.begin(), Tmp);
+ }
+
+ DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
+
+ if (Ops.size() == 1) {
+ // This expression tree simplified to something that isn't a tree,
+ // eliminate it.
+ I->replaceAllUsesWith(Ops[0].Op);
+ RemoveDeadBinaryOp(I);
+ return Ops[0].Op;
+ }
+
+ // Now that we ordered and optimized the expressions, splat them back into
+ // the expression tree, removing any unneeded nodes.
+ RewriteExprTree(I, Ops);
+ return I;
+}
+
+
+bool Reassociate::runOnFunction(Function &F) {
+ // Recalculate the rank map for F
+ BuildRankMap(F);
+
+ MadeChange = false;
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+ ReassociateBB(FI);
+
+ // We are done with the rank map.
+ RankMap.clear();
+ ValueRankMap.clear();
+ return MadeChange;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
new file mode 100644
index 0000000..506b72a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -0,0 +1,130 @@
+//===- Reg2Mem.cpp - Convert registers to allocas -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file demotes all registers to memory references. It is intented to be
+// the inverse of PromoteMemoryToRegister. By converting to loads, the only
+// values live accross basic blocks are allocas and loads before phi nodes.
+// It is intended that this should make CFG hacking much easier.
+// To make later hacking easier, the entry block is split into two, such that
+// all introduced allocas and nothing else are in the entry block.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reg2mem"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
+#include <list>
+using namespace llvm;
+
+STATISTIC(NumRegsDemoted, "Number of registers demoted");
+STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted");
+
+namespace {
+ struct RegToMem : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ RegToMem() : FunctionPass(ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(BreakCriticalEdgesID);
+ AU.addPreservedID(BreakCriticalEdgesID);
+ }
+
+ bool valueEscapes(const Instruction *Inst) const {
+ const BasicBlock *BB = Inst->getParent();
+ for (Value::const_use_iterator UI = Inst->use_begin(),E = Inst->use_end();
+ UI != E; ++UI) {
+ const Instruction *I = cast<Instruction>(*UI);
+ if (I->getParent() != BB || isa<PHINode>(I))
+ return true;
+ }
+ return false;
+ }
+
+ virtual bool runOnFunction(Function &F);
+ };
+}
+
+char RegToMem::ID = 0;
+INITIALIZE_PASS(RegToMem, "reg2mem", "Demote all values to stack slots",
+ false, false);
+
+
+bool RegToMem::runOnFunction(Function &F) {
+ if (F.isDeclaration())
+ return false;
+
+ // Insert all new allocas into entry block.
+ BasicBlock *BBEntry = &F.getEntryBlock();
+ assert(pred_begin(BBEntry) == pred_end(BBEntry) &&
+ "Entry block to function must not have predecessors!");
+
+ // Find first non-alloca instruction and create insertion point. This is
+ // safe if block is well-formed: it always have terminator, otherwise
+ // we'll get and assertion.
+ BasicBlock::iterator I = BBEntry->begin();
+ while (isa<AllocaInst>(I)) ++I;
+
+ CastInst *AllocaInsertionPoint =
+ new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+ Type::getInt32Ty(F.getContext()),
+ "reg2mem alloca point", I);
+
+ // Find the escaped instructions. But don't create stack slots for
+ // allocas in entry block.
+ std::list<Instruction*> WorkList;
+ for (Function::iterator ibb = F.begin(), ibe = F.end();
+ ibb != ibe; ++ibb)
+ for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
+ iib != iie; ++iib) {
+ if (!(isa<AllocaInst>(iib) && iib->getParent() == BBEntry) &&
+ valueEscapes(iib)) {
+ WorkList.push_front(&*iib);
+ }
+ }
+
+ // Demote escaped instructions
+ NumRegsDemoted += WorkList.size();
+ for (std::list<Instruction*>::iterator ilb = WorkList.begin(),
+ ile = WorkList.end(); ilb != ile; ++ilb)
+ DemoteRegToStack(**ilb, false, AllocaInsertionPoint);
+
+ WorkList.clear();
+
+ // Find all phi's
+ for (Function::iterator ibb = F.begin(), ibe = F.end();
+ ibb != ibe; ++ibb)
+ for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
+ iib != iie; ++iib)
+ if (isa<PHINode>(iib))
+ WorkList.push_front(&*iib);
+
+ // Demote phi nodes
+ NumPhisDemoted += WorkList.size();
+ for (std::list<Instruction*>::iterator ilb = WorkList.begin(),
+ ile = WorkList.end(); ilb != ile; ++ilb)
+ DemotePHIToStack(cast<PHINode>(*ilb), AllocaInsertionPoint);
+
+ return true;
+}
+
+
+// createDemoteRegisterToMemory - Provide an entry point to create this pass.
+//
+char &llvm::DemoteRegisterToMemoryID = RegToMem::ID;
+FunctionPass *llvm::createDemoteRegisterToMemoryPass() {
+ return new RegToMem();
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
new file mode 100644
index 0000000..6115c05
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -0,0 +1,1997 @@
+//===- SCCP.cpp - Sparse Conditional Constant Propagation -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements sparse conditional constant propagation and merging:
+//
+// Specifically, this:
+// * Assumes values are constant unless proven otherwise
+// * Assumes BasicBlocks are dead unless proven otherwise
+// * Proves values to be constant, and replaces them with constants
+// * Proves conditional branches to be unconditional
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sccp"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumInstRemoved, "Number of instructions removed");
+STATISTIC(NumDeadBlocks , "Number of basic blocks unreachable");
+
+STATISTIC(IPNumInstRemoved, "Number of instructions removed by IPSCCP");
+STATISTIC(IPNumArgsElimed ,"Number of arguments constant propagated by IPSCCP");
+STATISTIC(IPNumGlobalConst, "Number of globals found to be constant by IPSCCP");
+
+namespace {
+/// LatticeVal class - This class represents the different lattice values that
+/// an LLVM value may occupy. It is a simple class with value semantics.
+///
+class LatticeVal {
+ enum LatticeValueTy {
+ /// undefined - This LLVM Value has no known value yet.
+ undefined,
+
+ /// constant - This LLVM Value has a specific constant value.
+ constant,
+
+ /// forcedconstant - This LLVM Value was thought to be undef until
+ /// ResolvedUndefsIn. This is treated just like 'constant', but if merged
+ /// with another (different) constant, it goes to overdefined, instead of
+ /// asserting.
+ forcedconstant,
+
+ /// overdefined - This instruction is not known to be constant, and we know
+ /// it has a value.
+ overdefined
+ };
+
+ /// Val: This stores the current lattice value along with the Constant* for
+ /// the constant if this is a 'constant' or 'forcedconstant' value.
+ PointerIntPair<Constant *, 2, LatticeValueTy> Val;
+
+ LatticeValueTy getLatticeValue() const {
+ return Val.getInt();
+ }
+
+public:
+ LatticeVal() : Val(0, undefined) {}
+
+ bool isUndefined() const { return getLatticeValue() == undefined; }
+ bool isConstant() const {
+ return getLatticeValue() == constant || getLatticeValue() == forcedconstant;
+ }
+ bool isOverdefined() const { return getLatticeValue() == overdefined; }
+
+ Constant *getConstant() const {
+ assert(isConstant() && "Cannot get the constant of a non-constant!");
+ return Val.getPointer();
+ }
+
+ /// markOverdefined - Return true if this is a change in status.
+ bool markOverdefined() {
+ if (isOverdefined())
+ return false;
+
+ Val.setInt(overdefined);
+ return true;
+ }
+
+ /// markConstant - Return true if this is a change in status.
+ bool markConstant(Constant *V) {
+ if (getLatticeValue() == constant) { // Constant but not forcedconstant.
+ assert(getConstant() == V && "Marking constant with different value");
+ return false;
+ }
+
+ if (isUndefined()) {
+ Val.setInt(constant);
+ assert(V && "Marking constant with NULL");
+ Val.setPointer(V);
+ } else {
+ assert(getLatticeValue() == forcedconstant &&
+ "Cannot move from overdefined to constant!");
+ // Stay at forcedconstant if the constant is the same.
+ if (V == getConstant()) return false;
+
+ // Otherwise, we go to overdefined. Assumptions made based on the
+ // forced value are possibly wrong. Assuming this is another constant
+ // could expose a contradiction.
+ Val.setInt(overdefined);
+ }
+ return true;
+ }
+
+ /// getConstantInt - If this is a constant with a ConstantInt value, return it
+ /// otherwise return null.
+ ConstantInt *getConstantInt() const {
+ if (isConstant())
+ return dyn_cast<ConstantInt>(getConstant());
+ return 0;
+ }
+
+ void markForcedConstant(Constant *V) {
+ assert(isUndefined() && "Can't force a defined value!");
+ Val.setInt(forcedconstant);
+ Val.setPointer(V);
+ }
+};
+} // end anonymous namespace.
+
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+//
+/// SCCPSolver - This class is a general purpose solver for Sparse Conditional
+/// Constant Propagation.
+///
+class SCCPSolver : public InstVisitor<SCCPSolver> {
+ const TargetData *TD;
+ SmallPtrSet<BasicBlock*, 8> BBExecutable;// The BBs that are executable.
+ DenseMap<Value*, LatticeVal> ValueState; // The state each value is in.
+
+ /// StructValueState - This maintains ValueState for values that have
+ /// StructType, for example for formal arguments, calls, insertelement, etc.
+ ///
+ DenseMap<std::pair<Value*, unsigned>, LatticeVal> StructValueState;
+
+ /// GlobalValue - If we are tracking any values for the contents of a global
+ /// variable, we keep a mapping from the constant accessor to the element of
+ /// the global, to the currently known value. If the value becomes
+ /// overdefined, it's entry is simply removed from this map.
+ DenseMap<GlobalVariable*, LatticeVal> TrackedGlobals;
+
+ /// TrackedRetVals - If we are tracking arguments into and the return
+ /// value out of a function, it will have an entry in this map, indicating
+ /// what the known return value for the function is.
+ DenseMap<Function*, LatticeVal> TrackedRetVals;
+
+ /// TrackedMultipleRetVals - Same as TrackedRetVals, but used for functions
+ /// that return multiple values.
+ DenseMap<std::pair<Function*, unsigned>, LatticeVal> TrackedMultipleRetVals;
+
+ /// MRVFunctionsTracked - Each function in TrackedMultipleRetVals is
+ /// represented here for efficient lookup.
+ SmallPtrSet<Function*, 16> MRVFunctionsTracked;
+
+ /// TrackingIncomingArguments - This is the set of functions for whose
+ /// arguments we make optimistic assumptions about and try to prove as
+ /// constants.
+ SmallPtrSet<Function*, 16> TrackingIncomingArguments;
+
+ /// The reason for two worklists is that overdefined is the lowest state
+ /// on the lattice, and moving things to overdefined as fast as possible
+ /// makes SCCP converge much faster.
+ ///
+ /// By having a separate worklist, we accomplish this because everything
+ /// possibly overdefined will become overdefined at the soonest possible
+ /// point.
+ SmallVector<Value*, 64> OverdefinedInstWorkList;
+ SmallVector<Value*, 64> InstWorkList;
+
+
+ SmallVector<BasicBlock*, 64> BBWorkList; // The BasicBlock work list
+
+ /// UsersOfOverdefinedPHIs - Keep track of any users of PHI nodes that are not
+ /// overdefined, despite the fact that the PHI node is overdefined.
+ std::multimap<PHINode*, Instruction*> UsersOfOverdefinedPHIs;
+
+ /// KnownFeasibleEdges - Entries in this set are edges which have already had
+ /// PHI nodes retriggered.
+ typedef std::pair<BasicBlock*, BasicBlock*> Edge;
+ DenseSet<Edge> KnownFeasibleEdges;
+public:
+ SCCPSolver(const TargetData *td) : TD(td) {}
+
+ /// MarkBlockExecutable - This method can be used by clients to mark all of
+ /// the blocks that are known to be intrinsically live in the processed unit.
+ ///
+ /// This returns true if the block was not considered live before.
+ bool MarkBlockExecutable(BasicBlock *BB) {
+ if (!BBExecutable.insert(BB)) return false;
+ DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n");
+ BBWorkList.push_back(BB); // Add the block to the work list!
+ return true;
+ }
+
+ /// TrackValueOfGlobalVariable - Clients can use this method to
+ /// inform the SCCPSolver that it should track loads and stores to the
+ /// specified global variable if it can. This is only legal to call if
+ /// performing Interprocedural SCCP.
+ void TrackValueOfGlobalVariable(GlobalVariable *GV) {
+ // We only track the contents of scalar globals.
+ if (GV->getType()->getElementType()->isSingleValueType()) {
+ LatticeVal &IV = TrackedGlobals[GV];
+ if (!isa<UndefValue>(GV->getInitializer()))
+ IV.markConstant(GV->getInitializer());
+ }
+ }
+
+ /// AddTrackedFunction - If the SCCP solver is supposed to track calls into
+ /// and out of the specified function (which cannot have its address taken),
+ /// this method must be called.
+ void AddTrackedFunction(Function *F) {
+ // Add an entry, F -> undef.
+ if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
+ MRVFunctionsTracked.insert(F);
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ TrackedMultipleRetVals.insert(std::make_pair(std::make_pair(F, i),
+ LatticeVal()));
+ } else
+ TrackedRetVals.insert(std::make_pair(F, LatticeVal()));
+ }
+
+ void AddArgumentTrackedFunction(Function *F) {
+ TrackingIncomingArguments.insert(F);
+ }
+
+ /// Solve - Solve for constants and executable blocks.
+ ///
+ void Solve();
+
+ /// ResolvedUndefsIn - While solving the dataflow for a function, we assume
+ /// that branches on undef values cannot reach any of their successors.
+ /// However, this is not a safe assumption. After we solve dataflow, this
+ /// method should be use to handle this. If this returns true, the solver
+ /// should be rerun.
+ bool ResolvedUndefsIn(Function &F);
+
+ bool isBlockExecutable(BasicBlock *BB) const {
+ return BBExecutable.count(BB);
+ }
+
+ LatticeVal getLatticeValueFor(Value *V) const {
+ DenseMap<Value*, LatticeVal>::const_iterator I = ValueState.find(V);
+ assert(I != ValueState.end() && "V is not in valuemap!");
+ return I->second;
+ }
+
+ /*LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const {
+ DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I =
+ StructValueState.find(std::make_pair(V, i));
+ assert(I != StructValueState.end() && "V is not in valuemap!");
+ return I->second;
+ }*/
+
+ /// getTrackedRetVals - Get the inferred return value map.
+ ///
+ const DenseMap<Function*, LatticeVal> &getTrackedRetVals() {
+ return TrackedRetVals;
+ }
+
+ /// getTrackedGlobals - Get and return the set of inferred initializers for
+ /// global variables.
+ const DenseMap<GlobalVariable*, LatticeVal> &getTrackedGlobals() {
+ return TrackedGlobals;
+ }
+
+ void markOverdefined(Value *V) {
+ assert(!V->getType()->isStructTy() && "Should use other method");
+ markOverdefined(ValueState[V], V);
+ }
+
+ /// markAnythingOverdefined - Mark the specified value overdefined. This
+ /// works with both scalars and structs.
+ void markAnythingOverdefined(Value *V) {
+ if (const StructType *STy = dyn_cast<StructType>(V->getType()))
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ markOverdefined(getStructValueState(V, i), V);
+ else
+ markOverdefined(V);
+ }
+
+private:
+ // markConstant - Make a value be marked as "constant". If the value
+ // is not already a constant, add it to the instruction work list so that
+ // the users of the instruction are updated later.
+ //
+ void markConstant(LatticeVal &IV, Value *V, Constant *C) {
+ if (!IV.markConstant(C)) return;
+ DEBUG(dbgs() << "markConstant: " << *C << ": " << *V << '\n');
+ if (IV.isOverdefined())
+ OverdefinedInstWorkList.push_back(V);
+ else
+ InstWorkList.push_back(V);
+ }
+
+ void markConstant(Value *V, Constant *C) {
+ assert(!V->getType()->isStructTy() && "Should use other method");
+ markConstant(ValueState[V], V, C);
+ }
+
+ void markForcedConstant(Value *V, Constant *C) {
+ assert(!V->getType()->isStructTy() && "Should use other method");
+ LatticeVal &IV = ValueState[V];
+ IV.markForcedConstant(C);
+ DEBUG(dbgs() << "markForcedConstant: " << *C << ": " << *V << '\n');
+ if (IV.isOverdefined())
+ OverdefinedInstWorkList.push_back(V);
+ else
+ InstWorkList.push_back(V);
+ }
+
+
+ // markOverdefined - Make a value be marked as "overdefined". If the
+ // value is not already overdefined, add it to the overdefined instruction
+ // work list so that the users of the instruction are updated later.
+ void markOverdefined(LatticeVal &IV, Value *V) {
+ if (!IV.markOverdefined()) return;
+
+ DEBUG(dbgs() << "markOverdefined: ";
+ if (Function *F = dyn_cast<Function>(V))
+ dbgs() << "Function '" << F->getName() << "'\n";
+ else
+ dbgs() << *V << '\n');
+ // Only instructions go on the work list
+ OverdefinedInstWorkList.push_back(V);
+ }
+
+ void mergeInValue(LatticeVal &IV, Value *V, LatticeVal MergeWithV) {
+ if (IV.isOverdefined() || MergeWithV.isUndefined())
+ return; // Noop.
+ if (MergeWithV.isOverdefined())
+ markOverdefined(IV, V);
+ else if (IV.isUndefined())
+ markConstant(IV, V, MergeWithV.getConstant());
+ else if (IV.getConstant() != MergeWithV.getConstant())
+ markOverdefined(IV, V);
+ }
+
+ void mergeInValue(Value *V, LatticeVal MergeWithV) {
+ assert(!V->getType()->isStructTy() && "Should use other method");
+ mergeInValue(ValueState[V], V, MergeWithV);
+ }
+
+
+ /// getValueState - Return the LatticeVal object that corresponds to the
+ /// value. This function handles the case when the value hasn't been seen yet
+ /// by properly seeding constants etc.
+ LatticeVal &getValueState(Value *V) {
+ assert(!V->getType()->isStructTy() && "Should use getStructValueState");
+
+ std::pair<DenseMap<Value*, LatticeVal>::iterator, bool> I =
+ ValueState.insert(std::make_pair(V, LatticeVal()));
+ LatticeVal &LV = I.first->second;
+
+ if (!I.second)
+ return LV; // Common case, already in the map.
+
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ // Undef values remain undefined.
+ if (!isa<UndefValue>(V))
+ LV.markConstant(C); // Constants are constant
+ }
+
+ // All others are underdefined by default.
+ return LV;
+ }
+
+ /// getStructValueState - Return the LatticeVal object that corresponds to the
+ /// value/field pair. This function handles the case when the value hasn't
+ /// been seen yet by properly seeding constants etc.
+ LatticeVal &getStructValueState(Value *V, unsigned i) {
+ assert(V->getType()->isStructTy() && "Should use getValueState");
+ assert(i < cast<StructType>(V->getType())->getNumElements() &&
+ "Invalid element #");
+
+ std::pair<DenseMap<std::pair<Value*, unsigned>, LatticeVal>::iterator,
+ bool> I = StructValueState.insert(
+ std::make_pair(std::make_pair(V, i), LatticeVal()));
+ LatticeVal &LV = I.first->second;
+
+ if (!I.second)
+ return LV; // Common case, already in the map.
+
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ if (isa<UndefValue>(C))
+ ; // Undef values remain undefined.
+ else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C))
+ LV.markConstant(CS->getOperand(i)); // Constants are constant.
+ else if (isa<ConstantAggregateZero>(C)) {
+ const Type *FieldTy = cast<StructType>(V->getType())->getElementType(i);
+ LV.markConstant(Constant::getNullValue(FieldTy));
+ } else
+ LV.markOverdefined(); // Unknown sort of constant.
+ }
+
+ // All others are underdefined by default.
+ return LV;
+ }
+
+
+ /// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
+ /// work list if it is not already executable.
+ void markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) {
+ if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second)
+ return; // This edge is already known to be executable!
+
+ if (!MarkBlockExecutable(Dest)) {
+ // If the destination is already executable, we just made an *edge*
+ // feasible that wasn't before. Revisit the PHI nodes in the block
+ // because they have potentially new operands.
+ DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName()
+ << " -> " << Dest->getName() << "\n");
+
+ PHINode *PN;
+ for (BasicBlock::iterator I = Dest->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I)
+ visitPHINode(*PN);
+ }
+ }
+
+ // getFeasibleSuccessors - Return a vector of booleans to indicate which
+ // successors are reachable from a given terminator instruction.
+ //
+ void getFeasibleSuccessors(TerminatorInst &TI, SmallVector<bool, 16> &Succs);
+
+ // isEdgeFeasible - Return true if the control flow edge from the 'From' basic
+ // block to the 'To' basic block is currently feasible.
+ //
+ bool isEdgeFeasible(BasicBlock *From, BasicBlock *To);
+
+ // OperandChangedState - This method is invoked on all of the users of an
+ // instruction that was just changed state somehow. Based on this
+ // information, we need to update the specified user of this instruction.
+ //
+ void OperandChangedState(Instruction *I) {
+ if (BBExecutable.count(I->getParent())) // Inst is executable?
+ visit(*I);
+ }
+
+ /// RemoveFromOverdefinedPHIs - If I has any entries in the
+ /// UsersOfOverdefinedPHIs map for PN, remove them now.
+ void RemoveFromOverdefinedPHIs(Instruction *I, PHINode *PN) {
+ if (UsersOfOverdefinedPHIs.empty()) return;
+ std::multimap<PHINode*, Instruction*>::iterator It, E;
+ tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN);
+ while (It != E) {
+ if (It->second == I)
+ UsersOfOverdefinedPHIs.erase(It++);
+ else
+ ++It;
+ }
+ }
+
+private:
+ friend class InstVisitor<SCCPSolver>;
+
+ // visit implementations - Something changed in this instruction. Either an
+ // operand made a transition, or the instruction is newly executable. Change
+ // the value type of I to reflect these changes if appropriate.
+ void visitPHINode(PHINode &I);
+
+ // Terminators
+ void visitReturnInst(ReturnInst &I);
+ void visitTerminatorInst(TerminatorInst &TI);
+
+ void visitCastInst(CastInst &I);
+ void visitSelectInst(SelectInst &I);
+ void visitBinaryOperator(Instruction &I);
+ void visitCmpInst(CmpInst &I);
+ void visitExtractElementInst(ExtractElementInst &I);
+ void visitInsertElementInst(InsertElementInst &I);
+ void visitShuffleVectorInst(ShuffleVectorInst &I);
+ void visitExtractValueInst(ExtractValueInst &EVI);
+ void visitInsertValueInst(InsertValueInst &IVI);
+
+ // Instructions that cannot be folded away.
+ void visitStoreInst (StoreInst &I);
+ void visitLoadInst (LoadInst &I);
+ void visitGetElementPtrInst(GetElementPtrInst &I);
+ void visitCallInst (CallInst &I) {
+ visitCallSite(&I);
+ }
+ void visitInvokeInst (InvokeInst &II) {
+ visitCallSite(&II);
+ visitTerminatorInst(II);
+ }
+ void visitCallSite (CallSite CS);
+ void visitUnwindInst (TerminatorInst &I) { /*returns void*/ }
+ void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
+ void visitAllocaInst (Instruction &I) { markOverdefined(&I); }
+ void visitVAArgInst (Instruction &I) { markAnythingOverdefined(&I); }
+
+ void visitInstruction(Instruction &I) {
+ // If a new instruction is added to LLVM that we don't handle.
+ dbgs() << "SCCP: Don't know how to handle: " << I;
+ markAnythingOverdefined(&I); // Just in case
+ }
+};
+
+} // end anonymous namespace
+
+
+// getFeasibleSuccessors - Return a vector of booleans to indicate which
+// successors are reachable from a given terminator instruction.
+//
+void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
+ SmallVector<bool, 16> &Succs) {
+ Succs.resize(TI.getNumSuccessors());
+ if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) {
+ if (BI->isUnconditional()) {
+ Succs[0] = true;
+ return;
+ }
+
+ LatticeVal BCValue = getValueState(BI->getCondition());
+ ConstantInt *CI = BCValue.getConstantInt();
+ if (CI == 0) {
+ // Overdefined condition variables, and branches on unfoldable constant
+ // conditions, mean the branch could go either way.
+ if (!BCValue.isUndefined())
+ Succs[0] = Succs[1] = true;
+ return;
+ }
+
+ // Constant condition variables mean the branch can only go a single way.
+ Succs[CI->isZero()] = true;
+ return;
+ }
+
+ if (isa<InvokeInst>(TI)) {
+ // Invoke instructions successors are always executable.
+ Succs[0] = Succs[1] = true;
+ return;
+ }
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(&TI)) {
+ LatticeVal SCValue = getValueState(SI->getCondition());
+ ConstantInt *CI = SCValue.getConstantInt();
+
+ if (CI == 0) { // Overdefined or undefined condition?
+ // All destinations are executable!
+ if (!SCValue.isUndefined())
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+ Succs[SI->findCaseValue(CI)] = true;
+ return;
+ }
+
+ // TODO: This could be improved if the operand is a [cast of a] BlockAddress.
+ if (isa<IndirectBrInst>(&TI)) {
+ // Just mark all destinations executable!
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+#ifndef NDEBUG
+ dbgs() << "Unknown terminator instruction: " << TI << '\n';
+#endif
+ llvm_unreachable("SCCP: Don't know how to handle this terminator!");
+}
+
+
+// isEdgeFeasible - Return true if the control flow edge from the 'From' basic
+// block to the 'To' basic block is currently feasible.
+//
+bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
+ assert(BBExecutable.count(To) && "Dest should always be alive!");
+
+ // Make sure the source basic block is executable!!
+ if (!BBExecutable.count(From)) return false;
+
+ // Check to make sure this edge itself is actually feasible now.
+ TerminatorInst *TI = From->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isUnconditional())
+ return true;
+
+ LatticeVal BCValue = getValueState(BI->getCondition());
+
+ // Overdefined condition variables mean the branch could go either way,
+ // undef conditions mean that neither edge is feasible yet.
+ ConstantInt *CI = BCValue.getConstantInt();
+ if (CI == 0)
+ return !BCValue.isUndefined();
+
+ // Constant condition variables mean the branch can only go a single way.
+ return BI->getSuccessor(CI->isZero()) == To;
+ }
+
+ // Invoke instructions successors are always executable.
+ if (isa<InvokeInst>(TI))
+ return true;
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ LatticeVal SCValue = getValueState(SI->getCondition());
+ ConstantInt *CI = SCValue.getConstantInt();
+
+ if (CI == 0)
+ return !SCValue.isUndefined();
+
+ // Make sure to skip the "default value" which isn't a value
+ for (unsigned i = 1, E = SI->getNumSuccessors(); i != E; ++i)
+ if (SI->getSuccessorValue(i) == CI) // Found the taken branch.
+ return SI->getSuccessor(i) == To;
+
+ // If the constant value is not equal to any of the branches, we must
+ // execute default branch.
+ return SI->getDefaultDest() == To;
+ }
+
+ // Just mark all destinations executable!
+ // TODO: This could be improved if the operand is a [cast of a] BlockAddress.
+ if (isa<IndirectBrInst>(&TI))
+ return true;
+
+#ifndef NDEBUG
+ dbgs() << "Unknown terminator instruction: " << *TI << '\n';
+#endif
+ llvm_unreachable(0);
+}
+
+// visit Implementations - Something changed in this instruction, either an
+// operand made a transition, or the instruction is newly executable. Change
+// the value type of I to reflect these changes if appropriate. This method
+// makes sure to do the following actions:
+//
+// 1. If a phi node merges two constants in, and has conflicting value coming
+// from different branches, or if the PHI node merges in an overdefined
+// value, then the PHI node becomes overdefined.
+// 2. If a phi node merges only constants in, and they all agree on value, the
+// PHI node becomes a constant value equal to that.
+// 3. If V <- x (op) y && isConstant(x) && isConstant(y) V = Constant
+// 4. If V <- x (op) y && (isOverdefined(x) || isOverdefined(y)) V = Overdefined
+// 5. If V <- MEM or V <- CALL or V <- (unknown) then V = Overdefined
+// 6. If a conditional branch has a value that is constant, make the selected
+// destination executable
+// 7. If a conditional branch has a value that is overdefined, make all
+// successors executable.
+//
+void SCCPSolver::visitPHINode(PHINode &PN) {
+ // If this PN returns a struct, just mark the result overdefined.
+ // TODO: We could do a lot better than this if code actually uses this.
+ if (PN.getType()->isStructTy())
+ return markAnythingOverdefined(&PN);
+
+ if (getValueState(&PN).isOverdefined()) {
+ // There may be instructions using this PHI node that are not overdefined
+ // themselves. If so, make sure that they know that the PHI node operand
+ // changed.
+ std::multimap<PHINode*, Instruction*>::iterator I, E;
+ tie(I, E) = UsersOfOverdefinedPHIs.equal_range(&PN);
+ if (I == E)
+ return;
+
+ SmallVector<Instruction*, 16> Users;
+ for (; I != E; ++I)
+ Users.push_back(I->second);
+ while (!Users.empty())
+ visit(Users.pop_back_val());
+ return; // Quick exit
+ }
+
+ // Super-extra-high-degree PHI nodes are unlikely to ever be marked constant,
+ // and slow us down a lot. Just mark them overdefined.
+ if (PN.getNumIncomingValues() > 64)
+ return markOverdefined(&PN);
+
+ // Look at all of the executable operands of the PHI node. If any of them
+ // are overdefined, the PHI becomes overdefined as well. If they are all
+ // constant, and they agree with each other, the PHI becomes the identical
+ // constant. If they are constant and don't agree, the PHI is overdefined.
+ // If there are no executable operands, the PHI remains undefined.
+ //
+ Constant *OperandVal = 0;
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ LatticeVal IV = getValueState(PN.getIncomingValue(i));
+ if (IV.isUndefined()) continue; // Doesn't influence PHI node.
+
+ if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent()))
+ continue;
+
+ if (IV.isOverdefined()) // PHI node becomes overdefined!
+ return markOverdefined(&PN);
+
+ if (OperandVal == 0) { // Grab the first value.
+ OperandVal = IV.getConstant();
+ continue;
+ }
+
+ // There is already a reachable operand. If we conflict with it,
+ // then the PHI node becomes overdefined. If we agree with it, we
+ // can continue on.
+
+ // Check to see if there are two different constants merging, if so, the PHI
+ // node is overdefined.
+ if (IV.getConstant() != OperandVal)
+ return markOverdefined(&PN);
+ }
+
+ // If we exited the loop, this means that the PHI node only has constant
+ // arguments that agree with each other(and OperandVal is the constant) or
+ // OperandVal is null because there are no defined incoming arguments. If
+ // this is the case, the PHI remains undefined.
+ //
+ if (OperandVal)
+ markConstant(&PN, OperandVal); // Acquire operand value
+}
+
+
+
+
+void SCCPSolver::visitReturnInst(ReturnInst &I) {
+ if (I.getNumOperands() == 0) return; // ret void
+
+ Function *F = I.getParent()->getParent();
+ Value *ResultOp = I.getOperand(0);
+
+ // If we are tracking the return value of this function, merge it in.
+ if (!TrackedRetVals.empty() && !ResultOp->getType()->isStructTy()) {
+ DenseMap<Function*, LatticeVal>::iterator TFRVI =
+ TrackedRetVals.find(F);
+ if (TFRVI != TrackedRetVals.end()) {
+ mergeInValue(TFRVI->second, F, getValueState(ResultOp));
+ return;
+ }
+ }
+
+ // Handle functions that return multiple values.
+ if (!TrackedMultipleRetVals.empty()) {
+ if (const StructType *STy = dyn_cast<StructType>(ResultOp->getType()))
+ if (MRVFunctionsTracked.count(F))
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ mergeInValue(TrackedMultipleRetVals[std::make_pair(F, i)], F,
+ getStructValueState(ResultOp, i));
+
+ }
+}
+
+void SCCPSolver::visitTerminatorInst(TerminatorInst &TI) {
+ SmallVector<bool, 16> SuccFeasible;
+ getFeasibleSuccessors(TI, SuccFeasible);
+
+ BasicBlock *BB = TI.getParent();
+
+ // Mark all feasible successors executable.
+ for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i)
+ if (SuccFeasible[i])
+ markEdgeExecutable(BB, TI.getSuccessor(i));
+}
+
+void SCCPSolver::visitCastInst(CastInst &I) {
+ LatticeVal OpSt = getValueState(I.getOperand(0));
+ if (OpSt.isOverdefined()) // Inherit overdefinedness of operand
+ markOverdefined(&I);
+ else if (OpSt.isConstant()) // Propagate constant value
+ markConstant(&I, ConstantExpr::getCast(I.getOpcode(),
+ OpSt.getConstant(), I.getType()));
+}
+
+
+void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
+ // If this returns a struct, mark all elements over defined, we don't track
+ // structs in structs.
+ if (EVI.getType()->isStructTy())
+ return markAnythingOverdefined(&EVI);
+
+ // If this is extracting from more than one level of struct, we don't know.
+ if (EVI.getNumIndices() != 1)
+ return markOverdefined(&EVI);
+
+ Value *AggVal = EVI.getAggregateOperand();
+ if (AggVal->getType()->isStructTy()) {
+ unsigned i = *EVI.idx_begin();
+ LatticeVal EltVal = getStructValueState(AggVal, i);
+ mergeInValue(getValueState(&EVI), &EVI, EltVal);
+ } else {
+ // Otherwise, must be extracting from an array.
+ return markOverdefined(&EVI);
+ }
+}
+
+void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
+ const StructType *STy = dyn_cast<StructType>(IVI.getType());
+ if (STy == 0)
+ return markOverdefined(&IVI);
+
+ // If this has more than one index, we can't handle it, drive all results to
+ // undef.
+ if (IVI.getNumIndices() != 1)
+ return markAnythingOverdefined(&IVI);
+
+ Value *Aggr = IVI.getAggregateOperand();
+ unsigned Idx = *IVI.idx_begin();
+
+ // Compute the result based on what we're inserting.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ // This passes through all values that aren't the inserted element.
+ if (i != Idx) {
+ LatticeVal EltVal = getStructValueState(Aggr, i);
+ mergeInValue(getStructValueState(&IVI, i), &IVI, EltVal);
+ continue;
+ }
+
+ Value *Val = IVI.getInsertedValueOperand();
+ if (Val->getType()->isStructTy())
+ // We don't track structs in structs.
+ markOverdefined(getStructValueState(&IVI, i), &IVI);
+ else {
+ LatticeVal InVal = getValueState(Val);
+ mergeInValue(getStructValueState(&IVI, i), &IVI, InVal);
+ }
+ }
+}
+
+void SCCPSolver::visitSelectInst(SelectInst &I) {
+ // If this select returns a struct, just mark the result overdefined.
+ // TODO: We could do a lot better than this if code actually uses this.
+ if (I.getType()->isStructTy())
+ return markAnythingOverdefined(&I);
+
+ LatticeVal CondValue = getValueState(I.getCondition());
+ if (CondValue.isUndefined())
+ return;
+
+ if (ConstantInt *CondCB = CondValue.getConstantInt()) {
+ Value *OpVal = CondCB->isZero() ? I.getFalseValue() : I.getTrueValue();
+ mergeInValue(&I, getValueState(OpVal));
+ return;
+ }
+
+ // Otherwise, the condition is overdefined or a constant we can't evaluate.
+ // See if we can produce something better than overdefined based on the T/F
+ // value.
+ LatticeVal TVal = getValueState(I.getTrueValue());
+ LatticeVal FVal = getValueState(I.getFalseValue());
+
+ // select ?, C, C -> C.
+ if (TVal.isConstant() && FVal.isConstant() &&
+ TVal.getConstant() == FVal.getConstant())
+ return markConstant(&I, FVal.getConstant());
+
+ if (TVal.isUndefined()) // select ?, undef, X -> X.
+ return mergeInValue(&I, FVal);
+ if (FVal.isUndefined()) // select ?, X, undef -> X.
+ return mergeInValue(&I, TVal);
+ markOverdefined(&I);
+}
+
+// Handle Binary Operators.
+void SCCPSolver::visitBinaryOperator(Instruction &I) {
+ LatticeVal V1State = getValueState(I.getOperand(0));
+ LatticeVal V2State = getValueState(I.getOperand(1));
+
+ LatticeVal &IV = ValueState[&I];
+ if (IV.isOverdefined()) return;
+
+ if (V1State.isConstant() && V2State.isConstant())
+ return markConstant(IV, &I,
+ ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
+ V2State.getConstant()));
+
+ // If something is undef, wait for it to resolve.
+ if (!V1State.isOverdefined() && !V2State.isOverdefined())
+ return;
+
+ // Otherwise, one of our operands is overdefined. Try to produce something
+ // better than overdefined with some tricks.
+
+ // If this is an AND or OR with 0 or -1, it doesn't matter that the other
+ // operand is overdefined.
+ if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Or) {
+ LatticeVal *NonOverdefVal = 0;
+ if (!V1State.isOverdefined())
+ NonOverdefVal = &V1State;
+ else if (!V2State.isOverdefined())
+ NonOverdefVal = &V2State;
+
+ if (NonOverdefVal) {
+ if (NonOverdefVal->isUndefined()) {
+ // Could annihilate value.
+ if (I.getOpcode() == Instruction::And)
+ markConstant(IV, &I, Constant::getNullValue(I.getType()));
+ else if (const VectorType *PT = dyn_cast<VectorType>(I.getType()))
+ markConstant(IV, &I, Constant::getAllOnesValue(PT));
+ else
+ markConstant(IV, &I,
+ Constant::getAllOnesValue(I.getType()));
+ return;
+ }
+
+ if (I.getOpcode() == Instruction::And) {
+ // X and 0 = 0
+ if (NonOverdefVal->getConstant()->isNullValue())
+ return markConstant(IV, &I, NonOverdefVal->getConstant());
+ } else {
+ if (ConstantInt *CI = NonOverdefVal->getConstantInt())
+ if (CI->isAllOnesValue()) // X or -1 = -1
+ return markConstant(IV, &I, NonOverdefVal->getConstant());
+ }
+ }
+ }
+
+
+ // If both operands are PHI nodes, it is possible that this instruction has
+ // a constant value, despite the fact that the PHI node doesn't. Check for
+ // this condition now.
+ if (PHINode *PN1 = dyn_cast<PHINode>(I.getOperand(0)))
+ if (PHINode *PN2 = dyn_cast<PHINode>(I.getOperand(1)))
+ if (PN1->getParent() == PN2->getParent()) {
+ // Since the two PHI nodes are in the same basic block, they must have
+ // entries for the same predecessors. Walk the predecessor list, and
+ // if all of the incoming values are constants, and the result of
+ // evaluating this expression with all incoming value pairs is the
+ // same, then this expression is a constant even though the PHI node
+ // is not a constant!
+ LatticeVal Result;
+ for (unsigned i = 0, e = PN1->getNumIncomingValues(); i != e; ++i) {
+ LatticeVal In1 = getValueState(PN1->getIncomingValue(i));
+ BasicBlock *InBlock = PN1->getIncomingBlock(i);
+ LatticeVal In2 =getValueState(PN2->getIncomingValueForBlock(InBlock));
+
+ if (In1.isOverdefined() || In2.isOverdefined()) {
+ Result.markOverdefined();
+ break; // Cannot fold this operation over the PHI nodes!
+ }
+
+ if (In1.isConstant() && In2.isConstant()) {
+ Constant *V = ConstantExpr::get(I.getOpcode(), In1.getConstant(),
+ In2.getConstant());
+ if (Result.isUndefined())
+ Result.markConstant(V);
+ else if (Result.isConstant() && Result.getConstant() != V) {
+ Result.markOverdefined();
+ break;
+ }
+ }
+ }
+
+ // If we found a constant value here, then we know the instruction is
+ // constant despite the fact that the PHI nodes are overdefined.
+ if (Result.isConstant()) {
+ markConstant(IV, &I, Result.getConstant());
+ // Remember that this instruction is virtually using the PHI node
+ // operands.
+ UsersOfOverdefinedPHIs.insert(std::make_pair(PN1, &I));
+ UsersOfOverdefinedPHIs.insert(std::make_pair(PN2, &I));
+ return;
+ }
+
+ if (Result.isUndefined())
+ return;
+
+ // Okay, this really is overdefined now. Since we might have
+ // speculatively thought that this was not overdefined before, and
+ // added ourselves to the UsersOfOverdefinedPHIs list for the PHIs,
+ // make sure to clean out any entries that we put there, for
+ // efficiency.
+ RemoveFromOverdefinedPHIs(&I, PN1);
+ RemoveFromOverdefinedPHIs(&I, PN2);
+ }
+
+ markOverdefined(&I);
+}
+
+// Handle ICmpInst instruction.
+void SCCPSolver::visitCmpInst(CmpInst &I) {
+ LatticeVal V1State = getValueState(I.getOperand(0));
+ LatticeVal V2State = getValueState(I.getOperand(1));
+
+ LatticeVal &IV = ValueState[&I];
+ if (IV.isOverdefined()) return;
+
+ if (V1State.isConstant() && V2State.isConstant())
+ return markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(),
+ V1State.getConstant(),
+ V2State.getConstant()));
+
+ // If operands are still undefined, wait for it to resolve.
+ if (!V1State.isOverdefined() && !V2State.isOverdefined())
+ return;
+
+ // If something is overdefined, use some tricks to avoid ending up and over
+ // defined if we can.
+
+ // If both operands are PHI nodes, it is possible that this instruction has
+ // a constant value, despite the fact that the PHI node doesn't. Check for
+ // this condition now.
+ if (PHINode *PN1 = dyn_cast<PHINode>(I.getOperand(0)))
+ if (PHINode *PN2 = dyn_cast<PHINode>(I.getOperand(1)))
+ if (PN1->getParent() == PN2->getParent()) {
+ // Since the two PHI nodes are in the same basic block, they must have
+ // entries for the same predecessors. Walk the predecessor list, and
+ // if all of the incoming values are constants, and the result of
+ // evaluating this expression with all incoming value pairs is the
+ // same, then this expression is a constant even though the PHI node
+ // is not a constant!
+ LatticeVal Result;
+ for (unsigned i = 0, e = PN1->getNumIncomingValues(); i != e; ++i) {
+ LatticeVal In1 = getValueState(PN1->getIncomingValue(i));
+ BasicBlock *InBlock = PN1->getIncomingBlock(i);
+ LatticeVal In2 =getValueState(PN2->getIncomingValueForBlock(InBlock));
+
+ if (In1.isOverdefined() || In2.isOverdefined()) {
+ Result.markOverdefined();
+ break; // Cannot fold this operation over the PHI nodes!
+ }
+
+ if (In1.isConstant() && In2.isConstant()) {
+ Constant *V = ConstantExpr::getCompare(I.getPredicate(),
+ In1.getConstant(),
+ In2.getConstant());
+ if (Result.isUndefined())
+ Result.markConstant(V);
+ else if (Result.isConstant() && Result.getConstant() != V) {
+ Result.markOverdefined();
+ break;
+ }
+ }
+ }
+
+ // If we found a constant value here, then we know the instruction is
+ // constant despite the fact that the PHI nodes are overdefined.
+ if (Result.isConstant()) {
+ markConstant(&I, Result.getConstant());
+ // Remember that this instruction is virtually using the PHI node
+ // operands.
+ UsersOfOverdefinedPHIs.insert(std::make_pair(PN1, &I));
+ UsersOfOverdefinedPHIs.insert(std::make_pair(PN2, &I));
+ return;
+ }
+
+ if (Result.isUndefined())
+ return;
+
+ // Okay, this really is overdefined now. Since we might have
+ // speculatively thought that this was not overdefined before, and
+ // added ourselves to the UsersOfOverdefinedPHIs list for the PHIs,
+ // make sure to clean out any entries that we put there, for
+ // efficiency.
+ RemoveFromOverdefinedPHIs(&I, PN1);
+ RemoveFromOverdefinedPHIs(&I, PN2);
+ }
+
+ markOverdefined(&I);
+}
+
+void SCCPSolver::visitExtractElementInst(ExtractElementInst &I) {
+ // TODO : SCCP does not handle vectors properly.
+ return markOverdefined(&I);
+
+#if 0
+ LatticeVal &ValState = getValueState(I.getOperand(0));
+ LatticeVal &IdxState = getValueState(I.getOperand(1));
+
+ if (ValState.isOverdefined() || IdxState.isOverdefined())
+ markOverdefined(&I);
+ else if(ValState.isConstant() && IdxState.isConstant())
+ markConstant(&I, ConstantExpr::getExtractElement(ValState.getConstant(),
+ IdxState.getConstant()));
+#endif
+}
+
+void SCCPSolver::visitInsertElementInst(InsertElementInst &I) {
+ // TODO : SCCP does not handle vectors properly.
+ return markOverdefined(&I);
+#if 0
+ LatticeVal &ValState = getValueState(I.getOperand(0));
+ LatticeVal &EltState = getValueState(I.getOperand(1));
+ LatticeVal &IdxState = getValueState(I.getOperand(2));
+
+ if (ValState.isOverdefined() || EltState.isOverdefined() ||
+ IdxState.isOverdefined())
+ markOverdefined(&I);
+ else if(ValState.isConstant() && EltState.isConstant() &&
+ IdxState.isConstant())
+ markConstant(&I, ConstantExpr::getInsertElement(ValState.getConstant(),
+ EltState.getConstant(),
+ IdxState.getConstant()));
+ else if (ValState.isUndefined() && EltState.isConstant() &&
+ IdxState.isConstant())
+ markConstant(&I,ConstantExpr::getInsertElement(UndefValue::get(I.getType()),
+ EltState.getConstant(),
+ IdxState.getConstant()));
+#endif
+}
+
+void SCCPSolver::visitShuffleVectorInst(ShuffleVectorInst &I) {
+ // TODO : SCCP does not handle vectors properly.
+ return markOverdefined(&I);
+#if 0
+ LatticeVal &V1State = getValueState(I.getOperand(0));
+ LatticeVal &V2State = getValueState(I.getOperand(1));
+ LatticeVal &MaskState = getValueState(I.getOperand(2));
+
+ if (MaskState.isUndefined() ||
+ (V1State.isUndefined() && V2State.isUndefined()))
+ return; // Undefined output if mask or both inputs undefined.
+
+ if (V1State.isOverdefined() || V2State.isOverdefined() ||
+ MaskState.isOverdefined()) {
+ markOverdefined(&I);
+ } else {
+ // A mix of constant/undef inputs.
+ Constant *V1 = V1State.isConstant() ?
+ V1State.getConstant() : UndefValue::get(I.getType());
+ Constant *V2 = V2State.isConstant() ?
+ V2State.getConstant() : UndefValue::get(I.getType());
+ Constant *Mask = MaskState.isConstant() ?
+ MaskState.getConstant() : UndefValue::get(I.getOperand(2)->getType());
+ markConstant(&I, ConstantExpr::getShuffleVector(V1, V2, Mask));
+ }
+#endif
+}
+
+// Handle getelementptr instructions. If all operands are constants then we
+// can turn this into a getelementptr ConstantExpr.
+//
+void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
+ if (ValueState[&I].isOverdefined()) return;
+
+ SmallVector<Constant*, 8> Operands;
+ Operands.reserve(I.getNumOperands());
+
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ LatticeVal State = getValueState(I.getOperand(i));
+ if (State.isUndefined())
+ return; // Operands are not resolved yet.
+
+ if (State.isOverdefined())
+ return markOverdefined(&I);
+
+ assert(State.isConstant() && "Unknown state!");
+ Operands.push_back(State.getConstant());
+ }
+
+ Constant *Ptr = Operands[0];
+ markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, &Operands[0]+1,
+ Operands.size()-1));
+}
+
+void SCCPSolver::visitStoreInst(StoreInst &SI) {
+ // If this store is of a struct, ignore it.
+ if (SI.getOperand(0)->getType()->isStructTy())
+ return;
+
+ if (TrackedGlobals.empty() || !isa<GlobalVariable>(SI.getOperand(1)))
+ return;
+
+ GlobalVariable *GV = cast<GlobalVariable>(SI.getOperand(1));
+ DenseMap<GlobalVariable*, LatticeVal>::iterator I = TrackedGlobals.find(GV);
+ if (I == TrackedGlobals.end() || I->second.isOverdefined()) return;
+
+ // Get the value we are storing into the global, then merge it.
+ mergeInValue(I->second, GV, getValueState(SI.getOperand(0)));
+ if (I->second.isOverdefined())
+ TrackedGlobals.erase(I); // No need to keep tracking this!
+}
+
+
+// Handle load instructions. If the operand is a constant pointer to a constant
+// global, we can replace the load with the loaded constant value!
+void SCCPSolver::visitLoadInst(LoadInst &I) {
+ // If this load is of a struct, just mark the result overdefined.
+ if (I.getType()->isStructTy())
+ return markAnythingOverdefined(&I);
+
+ LatticeVal PtrVal = getValueState(I.getOperand(0));
+ if (PtrVal.isUndefined()) return; // The pointer is not resolved yet!
+
+ LatticeVal &IV = ValueState[&I];
+ if (IV.isOverdefined()) return;
+
+ if (!PtrVal.isConstant() || I.isVolatile())
+ return markOverdefined(IV, &I);
+
+ Constant *Ptr = PtrVal.getConstant();
+
+ // load null -> null
+ if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0)
+ return markConstant(IV, &I, Constant::getNullValue(I.getType()));
+
+ // Transform load (constant global) into the value loaded.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
+ if (!TrackedGlobals.empty()) {
+ // If we are tracking this global, merge in the known value for it.
+ DenseMap<GlobalVariable*, LatticeVal>::iterator It =
+ TrackedGlobals.find(GV);
+ if (It != TrackedGlobals.end()) {
+ mergeInValue(IV, &I, It->second);
+ return;
+ }
+ }
+ }
+
+ // Transform load from a constant into a constant if possible.
+ if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, TD))
+ return markConstant(IV, &I, C);
+
+ // Otherwise we cannot say for certain what value this load will produce.
+ // Bail out.
+ markOverdefined(IV, &I);
+}
+
+void SCCPSolver::visitCallSite(CallSite CS) {
+ Function *F = CS.getCalledFunction();
+ Instruction *I = CS.getInstruction();
+
+ // The common case is that we aren't tracking the callee, either because we
+ // are not doing interprocedural analysis or the callee is indirect, or is
+ // external. Handle these cases first.
+ if (F == 0 || F->isDeclaration()) {
+CallOverdefined:
+ // Void return and not tracking callee, just bail.
+ if (I->getType()->isVoidTy()) return;
+
+ // Otherwise, if we have a single return value case, and if the function is
+ // a declaration, maybe we can constant fold it.
+ if (F && F->isDeclaration() && !I->getType()->isStructTy() &&
+ canConstantFoldCallTo(F)) {
+
+ SmallVector<Constant*, 8> Operands;
+ for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end();
+ AI != E; ++AI) {
+ LatticeVal State = getValueState(*AI);
+
+ if (State.isUndefined())
+ return; // Operands are not resolved yet.
+ if (State.isOverdefined())
+ return markOverdefined(I);
+ assert(State.isConstant() && "Unknown state!");
+ Operands.push_back(State.getConstant());
+ }
+
+ // If we can constant fold this, mark the result of the call as a
+ // constant.
+ if (Constant *C = ConstantFoldCall(F, Operands.data(), Operands.size()))
+ return markConstant(I, C);
+ }
+
+ // Otherwise, we don't know anything about this call, mark it overdefined.
+ return markAnythingOverdefined(I);
+ }
+
+ // If this is a local function that doesn't have its address taken, mark its
+ // entry block executable and merge in the actual arguments to the call into
+ // the formal arguments of the function.
+ if (!TrackingIncomingArguments.empty() && TrackingIncomingArguments.count(F)){
+ MarkBlockExecutable(F->begin());
+
+ // Propagate information from this call site into the callee.
+ CallSite::arg_iterator CAI = CS.arg_begin();
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI, ++CAI) {
+ // If this argument is byval, and if the function is not readonly, there
+ // will be an implicit copy formed of the input aggregate.
+ if (AI->hasByValAttr() && !F->onlyReadsMemory()) {
+ markOverdefined(AI);
+ continue;
+ }
+
+ if (const StructType *STy = dyn_cast<StructType>(AI->getType())) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ LatticeVal CallArg = getStructValueState(*CAI, i);
+ mergeInValue(getStructValueState(AI, i), AI, CallArg);
+ }
+ } else {
+ mergeInValue(AI, getValueState(*CAI));
+ }
+ }
+ }
+
+ // If this is a single/zero retval case, see if we're tracking the function.
+ if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
+ if (!MRVFunctionsTracked.count(F))
+ goto CallOverdefined; // Not tracking this callee.
+
+ // If we are tracking this callee, propagate the result of the function
+ // into this call site.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ mergeInValue(getStructValueState(I, i), I,
+ TrackedMultipleRetVals[std::make_pair(F, i)]);
+ } else {
+ DenseMap<Function*, LatticeVal>::iterator TFRVI = TrackedRetVals.find(F);
+ if (TFRVI == TrackedRetVals.end())
+ goto CallOverdefined; // Not tracking this callee.
+
+ // If so, propagate the return value of the callee into this call result.
+ mergeInValue(I, TFRVI->second);
+ }
+}
+
+void SCCPSolver::Solve() {
+ // Process the work lists until they are empty!
+ while (!BBWorkList.empty() || !InstWorkList.empty() ||
+ !OverdefinedInstWorkList.empty()) {
+ // Process the overdefined instruction's work list first, which drives other
+ // things to overdefined more quickly.
+ while (!OverdefinedInstWorkList.empty()) {
+ Value *I = OverdefinedInstWorkList.pop_back_val();
+
+ DEBUG(dbgs() << "\nPopped off OI-WL: " << *I << '\n');
+
+ // "I" got into the work list because it either made the transition from
+ // bottom to constant
+ //
+ // Anything on this worklist that is overdefined need not be visited
+ // since all of its users will have already been marked as overdefined
+ // Update all of the users of this instruction's value.
+ //
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI)
+ if (Instruction *I = dyn_cast<Instruction>(*UI))
+ OperandChangedState(I);
+ }
+
+ // Process the instruction work list.
+ while (!InstWorkList.empty()) {
+ Value *I = InstWorkList.pop_back_val();
+
+ DEBUG(dbgs() << "\nPopped off I-WL: " << *I << '\n');
+
+ // "I" got into the work list because it made the transition from undef to
+ // constant.
+ //
+ // Anything on this worklist that is overdefined need not be visited
+ // since all of its users will have already been marked as overdefined.
+ // Update all of the users of this instruction's value.
+ //
+ if (I->getType()->isStructTy() || !getValueState(I).isOverdefined())
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI)
+ if (Instruction *I = dyn_cast<Instruction>(*UI))
+ OperandChangedState(I);
+ }
+
+ // Process the basic block work list.
+ while (!BBWorkList.empty()) {
+ BasicBlock *BB = BBWorkList.back();
+ BBWorkList.pop_back();
+
+ DEBUG(dbgs() << "\nPopped off BBWL: " << *BB << '\n');
+
+ // Notify all instructions in this basic block that they are newly
+ // executable.
+ visit(BB);
+ }
+ }
+}
+
+/// ResolvedUndefsIn - While solving the dataflow for a function, we assume
+/// that branches on undef values cannot reach any of their successors.
+/// However, this is not a safe assumption. After we solve dataflow, this
+/// method should be use to handle this. If this returns true, the solver
+/// should be rerun.
+///
+/// This method handles this by finding an unresolved branch and marking it one
+/// of the edges from the block as being feasible, even though the condition
+/// doesn't say it would otherwise be. This allows SCCP to find the rest of the
+/// CFG and only slightly pessimizes the analysis results (by marking one,
+/// potentially infeasible, edge feasible). This cannot usefully modify the
+/// constraints on the condition of the branch, as that would impact other users
+/// of the value.
+///
+/// This scan also checks for values that use undefs, whose results are actually
+/// defined. For example, 'zext i8 undef to i32' should produce all zeros
+/// conservatively, as "(zext i8 X -> i32) & 0xFF00" must always return zero,
+/// even if X isn't defined.
+bool SCCPSolver::ResolvedUndefsIn(Function &F) {
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (!BBExecutable.count(BB))
+ continue;
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // Look for instructions which produce undef values.
+ if (I->getType()->isVoidTy()) continue;
+
+ if (const StructType *STy = dyn_cast<StructType>(I->getType())) {
+ // Only a few things that can be structs matter for undef. Just send
+ // all their results to overdefined. We could be more precise than this
+ // but it isn't worth bothering.
+ if (isa<CallInst>(I) || isa<SelectInst>(I)) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ LatticeVal &LV = getStructValueState(I, i);
+ if (LV.isUndefined())
+ markOverdefined(LV, I);
+ }
+ }
+ continue;
+ }
+
+ LatticeVal &LV = getValueState(I);
+ if (!LV.isUndefined()) continue;
+
+ // No instructions using structs need disambiguation.
+ if (I->getOperand(0)->getType()->isStructTy())
+ continue;
+
+ // Get the lattice values of the first two operands for use below.
+ LatticeVal Op0LV = getValueState(I->getOperand(0));
+ LatticeVal Op1LV;
+ if (I->getNumOperands() == 2) {
+ // No instructions using structs need disambiguation.
+ if (I->getOperand(1)->getType()->isStructTy())
+ continue;
+
+ // If this is a two-operand instruction, and if both operands are
+ // undefs, the result stays undef.
+ Op1LV = getValueState(I->getOperand(1));
+ if (Op0LV.isUndefined() && Op1LV.isUndefined())
+ continue;
+ }
+
+ // If this is an instructions whose result is defined even if the input is
+ // not fully defined, propagate the information.
+ const Type *ITy = I->getType();
+ switch (I->getOpcode()) {
+ default: break; // Leave the instruction as an undef.
+ case Instruction::ZExt:
+ // After a zero extend, we know the top part is zero. SExt doesn't have
+ // to be handled here, because we don't know whether the top part is 1's
+ // or 0's.
+ case Instruction::SIToFP: // some FP values are not possible, just use 0.
+ case Instruction::UIToFP: // some FP values are not possible, just use 0.
+ markForcedConstant(I, Constant::getNullValue(ITy));
+ return true;
+ case Instruction::Mul:
+ case Instruction::And:
+ // undef * X -> 0. X could be zero.
+ // undef & X -> 0. X could be zero.
+ markForcedConstant(I, Constant::getNullValue(ITy));
+ return true;
+
+ case Instruction::Or:
+ // undef | X -> -1. X could be -1.
+ markForcedConstant(I, Constant::getAllOnesValue(ITy));
+ return true;
+
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ // X / undef -> undef. No change.
+ // X % undef -> undef. No change.
+ if (Op1LV.isUndefined()) break;
+
+ // undef / X -> 0. X could be maxint.
+ // undef % X -> 0. X could be 1.
+ markForcedConstant(I, Constant::getNullValue(ITy));
+ return true;
+
+ case Instruction::AShr:
+ // undef >>s X -> undef. No change.
+ if (Op0LV.isUndefined()) break;
+
+ // X >>s undef -> X. X could be 0, X could have the high-bit known set.
+ if (Op0LV.isConstant())
+ markForcedConstant(I, Op0LV.getConstant());
+ else
+ markOverdefined(I);
+ return true;
+ case Instruction::LShr:
+ case Instruction::Shl:
+ // undef >> X -> undef. No change.
+ // undef << X -> undef. No change.
+ if (Op0LV.isUndefined()) break;
+
+ // X >> undef -> 0. X could be 0.
+ // X << undef -> 0. X could be 0.
+ markForcedConstant(I, Constant::getNullValue(ITy));
+ return true;
+ case Instruction::Select:
+ // undef ? X : Y -> X or Y. There could be commonality between X/Y.
+ if (Op0LV.isUndefined()) {
+ if (!Op1LV.isConstant()) // Pick the constant one if there is any.
+ Op1LV = getValueState(I->getOperand(2));
+ } else if (Op1LV.isUndefined()) {
+ // c ? undef : undef -> undef. No change.
+ Op1LV = getValueState(I->getOperand(2));
+ if (Op1LV.isUndefined())
+ break;
+ // Otherwise, c ? undef : x -> x.
+ } else {
+ // Leave Op1LV as Operand(1)'s LatticeValue.
+ }
+
+ if (Op1LV.isConstant())
+ markForcedConstant(I, Op1LV.getConstant());
+ else
+ markOverdefined(I);
+ return true;
+ case Instruction::Call:
+ // If a call has an undef result, it is because it is constant foldable
+ // but one of the inputs was undef. Just force the result to
+ // overdefined.
+ markOverdefined(I);
+ return true;
+ }
+ }
+
+ // Check to see if we have a branch or switch on an undefined value. If so
+ // we force the branch to go one way or the other to make the successor
+ // values live. It doesn't really matter which way we force it.
+ TerminatorInst *TI = BB->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (!BI->isConditional()) continue;
+ if (!getValueState(BI->getCondition()).isUndefined())
+ continue;
+
+ // If the input to SCCP is actually branch on undef, fix the undef to
+ // false.
+ if (isa<UndefValue>(BI->getCondition())) {
+ BI->setCondition(ConstantInt::getFalse(BI->getContext()));
+ markEdgeExecutable(BB, TI->getSuccessor(1));
+ return true;
+ }
+
+ // Otherwise, it is a branch on a symbolic value which is currently
+ // considered to be undef. Handle this by forcing the input value to the
+ // branch to false.
+ markForcedConstant(BI->getCondition(),
+ ConstantInt::getFalse(TI->getContext()));
+ return true;
+ }
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ if (SI->getNumSuccessors() < 2) // no cases
+ continue;
+ if (!getValueState(SI->getCondition()).isUndefined())
+ continue;
+
+ // If the input to SCCP is actually switch on undef, fix the undef to
+ // the first constant.
+ if (isa<UndefValue>(SI->getCondition())) {
+ SI->setCondition(SI->getCaseValue(1));
+ markEdgeExecutable(BB, TI->getSuccessor(1));
+ return true;
+ }
+
+ markForcedConstant(SI->getCondition(), SI->getCaseValue(1));
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ //
+ /// SCCP Class - This class uses the SCCPSolver to implement a per-function
+ /// Sparse Conditional Constant Propagator.
+ ///
+ struct SCCP : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ SCCP() : FunctionPass(ID) {}
+
+ // runOnFunction - Run the Sparse Conditional Constant Propagation
+ // algorithm, and return true if the function was modified.
+ //
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+ };
+} // end anonymous namespace
+
+char SCCP::ID = 0;
+INITIALIZE_PASS(SCCP, "sccp",
+ "Sparse Conditional Constant Propagation", false, false);
+
+// createSCCPPass - This is the public interface to this file.
+FunctionPass *llvm::createSCCPPass() {
+ return new SCCP();
+}
+
+static void DeleteInstructionInBlock(BasicBlock *BB) {
+ DEBUG(dbgs() << " BasicBlock Dead:" << *BB);
+ ++NumDeadBlocks;
+
+ // Delete the instructions backwards, as it has a reduced likelihood of
+ // having to update as many def-use and use-def chains.
+ while (!isa<TerminatorInst>(BB->begin())) {
+ Instruction *I = --BasicBlock::iterator(BB->getTerminator());
+
+ if (!I->use_empty())
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ BB->getInstList().erase(I);
+ ++NumInstRemoved;
+ }
+}
+
+// runOnFunction() - Run the Sparse Conditional Constant Propagation algorithm,
+// and return true if the function was modified.
+//
+bool SCCP::runOnFunction(Function &F) {
+ DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
+ SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+
+ // Mark the first block of the function as being executable.
+ Solver.MarkBlockExecutable(F.begin());
+
+ // Mark all arguments to the function as being overdefined.
+ for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E;++AI)
+ Solver.markAnythingOverdefined(AI);
+
+ // Solve for constants.
+ bool ResolvedUndefs = true;
+ while (ResolvedUndefs) {
+ Solver.Solve();
+ DEBUG(dbgs() << "RESOLVING UNDEFs\n");
+ ResolvedUndefs = Solver.ResolvedUndefsIn(F);
+ }
+
+ bool MadeChanges = false;
+
+ // If we decided that there are basic blocks that are dead in this function,
+ // delete their contents now. Note that we cannot actually delete the blocks,
+ // as we cannot modify the CFG of the function.
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (!Solver.isBlockExecutable(BB)) {
+ DeleteInstructionInBlock(BB);
+ MadeChanges = true;
+ continue;
+ }
+
+ // Iterate over all of the instructions in a function, replacing them with
+ // constants if we have found them to be of constant values.
+ //
+ for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+ Instruction *Inst = BI++;
+ if (Inst->getType()->isVoidTy() || isa<TerminatorInst>(Inst))
+ continue;
+
+ // TODO: Reconstruct structs from their elements.
+ if (Inst->getType()->isStructTy())
+ continue;
+
+ LatticeVal IV = Solver.getLatticeValueFor(Inst);
+ if (IV.isOverdefined())
+ continue;
+
+ Constant *Const = IV.isConstant()
+ ? IV.getConstant() : UndefValue::get(Inst->getType());
+ DEBUG(dbgs() << " Constant: " << *Const << " = " << *Inst);
+
+ // Replaces all of the uses of a variable with uses of the constant.
+ Inst->replaceAllUsesWith(Const);
+
+ // Delete the instruction.
+ Inst->eraseFromParent();
+
+ // Hey, we just changed something!
+ MadeChanges = true;
+ ++NumInstRemoved;
+ }
+ }
+
+ return MadeChanges;
+}
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ //
+ /// IPSCCP Class - This class implements interprocedural Sparse Conditional
+ /// Constant Propagation.
+ ///
+ struct IPSCCP : public ModulePass {
+ static char ID;
+ IPSCCP() : ModulePass(ID) {}
+ bool runOnModule(Module &M);
+ };
+} // end anonymous namespace
+
+char IPSCCP::ID = 0;
+INITIALIZE_PASS(IPSCCP, "ipsccp",
+ "Interprocedural Sparse Conditional Constant Propagation",
+ false, false);
+
+// createIPSCCPPass - This is the public interface to this file.
+ModulePass *llvm::createIPSCCPPass() {
+ return new IPSCCP();
+}
+
+
+static bool AddressIsTaken(const GlobalValue *GV) {
+ // Delete any dead constantexpr klingons.
+ GV->removeDeadConstantUsers();
+
+ for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
+ UI != E; ++UI) {
+ const User *U = *UI;
+ if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getOperand(0) == GV || SI->isVolatile())
+ return true; // Storing addr of GV.
+ } else if (isa<InvokeInst>(U) || isa<CallInst>(U)) {
+ // Make sure we are calling the function, not passing the address.
+ ImmutableCallSite CS(cast<Instruction>(U));
+ if (!CS.isCallee(UI))
+ return true;
+ } else if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ if (LI->isVolatile())
+ return true;
+ } else if (isa<BlockAddress>(U)) {
+ // blockaddress doesn't take the address of the function, it takes addr
+ // of label.
+ } else {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool IPSCCP::runOnModule(Module &M) {
+ SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+
+ // AddressTakenFunctions - This set keeps track of the address-taken functions
+ // that are in the input. As IPSCCP runs through and simplifies code,
+ // functions that were address taken can end up losing their
+ // address-taken-ness. Because of this, we keep track of their addresses from
+ // the first pass so we can use them for the later simplification pass.
+ SmallPtrSet<Function*, 32> AddressTakenFunctions;
+
+ // Loop over all functions, marking arguments to those with their addresses
+ // taken or that are external as overdefined.
+ //
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration())
+ continue;
+
+ // If this is a strong or ODR definition of this function, then we can
+ // propagate information about its result into callsites of it.
+ if (!F->mayBeOverridden())
+ Solver.AddTrackedFunction(F);
+
+ // If this function only has direct calls that we can see, we can track its
+ // arguments and return value aggressively, and can assume it is not called
+ // unless we see evidence to the contrary.
+ if (F->hasLocalLinkage()) {
+ if (AddressIsTaken(F))
+ AddressTakenFunctions.insert(F);
+ else {
+ Solver.AddArgumentTrackedFunction(F);
+ continue;
+ }
+ }
+
+ // Assume the function is called.
+ Solver.MarkBlockExecutable(F->begin());
+
+ // Assume nothing about the incoming arguments.
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI)
+ Solver.markAnythingOverdefined(AI);
+ }
+
+ // Loop over global variables. We inform the solver about any internal global
+ // variables that do not have their 'addresses taken'. If they don't have
+ // their addresses taken, we can propagate constants through them.
+ for (Module::global_iterator G = M.global_begin(), E = M.global_end();
+ G != E; ++G)
+ if (!G->isConstant() && G->hasLocalLinkage() && !AddressIsTaken(G))
+ Solver.TrackValueOfGlobalVariable(G);
+
+ // Solve for constants.
+ bool ResolvedUndefs = true;
+ while (ResolvedUndefs) {
+ Solver.Solve();
+
+ DEBUG(dbgs() << "RESOLVING UNDEFS\n");
+ ResolvedUndefs = false;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
+ ResolvedUndefs |= Solver.ResolvedUndefsIn(*F);
+ }
+
+ bool MadeChanges = false;
+
+ // Iterate over all of the instructions in the module, replacing them with
+ // constants if we have found them to be of constant values.
+ //
+ SmallVector<BasicBlock*, 512> BlocksToErase;
+
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (Solver.isBlockExecutable(F->begin())) {
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI) {
+ if (AI->use_empty() || AI->getType()->isStructTy()) continue;
+
+ // TODO: Could use getStructLatticeValueFor to find out if the entire
+ // result is a constant and replace it entirely if so.
+
+ LatticeVal IV = Solver.getLatticeValueFor(AI);
+ if (IV.isOverdefined()) continue;
+
+ Constant *CST = IV.isConstant() ?
+ IV.getConstant() : UndefValue::get(AI->getType());
+ DEBUG(dbgs() << "*** Arg " << *AI << " = " << *CST <<"\n");
+
+ // Replaces all of the uses of a variable with uses of the
+ // constant.
+ AI->replaceAllUsesWith(CST);
+ ++IPNumArgsElimed;
+ }
+ }
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ if (!Solver.isBlockExecutable(BB)) {
+ DeleteInstructionInBlock(BB);
+ MadeChanges = true;
+
+ TerminatorInst *TI = BB->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *Succ = TI->getSuccessor(i);
+ if (!Succ->empty() && isa<PHINode>(Succ->begin()))
+ TI->getSuccessor(i)->removePredecessor(BB);
+ }
+ if (!TI->use_empty())
+ TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+ TI->eraseFromParent();
+
+ if (&*BB != &F->front())
+ BlocksToErase.push_back(BB);
+ else
+ new UnreachableInst(M.getContext(), BB);
+ continue;
+ }
+
+ for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+ Instruction *Inst = BI++;
+ if (Inst->getType()->isVoidTy() || Inst->getType()->isStructTy())
+ continue;
+
+ // TODO: Could use getStructLatticeValueFor to find out if the entire
+ // result is a constant and replace it entirely if so.
+
+ LatticeVal IV = Solver.getLatticeValueFor(Inst);
+ if (IV.isOverdefined())
+ continue;
+
+ Constant *Const = IV.isConstant()
+ ? IV.getConstant() : UndefValue::get(Inst->getType());
+ DEBUG(dbgs() << " Constant: " << *Const << " = " << *Inst);
+
+ // Replaces all of the uses of a variable with uses of the
+ // constant.
+ Inst->replaceAllUsesWith(Const);
+
+ // Delete the instruction.
+ if (!isa<CallInst>(Inst) && !isa<TerminatorInst>(Inst))
+ Inst->eraseFromParent();
+
+ // Hey, we just changed something!
+ MadeChanges = true;
+ ++IPNumInstRemoved;
+ }
+ }
+
+ // Now that all instructions in the function are constant folded, erase dead
+ // blocks, because we can now use ConstantFoldTerminator to get rid of
+ // in-edges.
+ for (unsigned i = 0, e = BlocksToErase.size(); i != e; ++i) {
+ // If there are any PHI nodes in this successor, drop entries for BB now.
+ BasicBlock *DeadBB = BlocksToErase[i];
+ for (Value::use_iterator UI = DeadBB->use_begin(), UE = DeadBB->use_end();
+ UI != UE; ) {
+ // Grab the user and then increment the iterator early, as the user
+ // will be deleted. Step past all adjacent uses from the same user.
+ Instruction *I = dyn_cast<Instruction>(*UI);
+ do { ++UI; } while (UI != UE && *UI == I);
+
+ // Ignore blockaddress users; BasicBlock's dtor will handle them.
+ if (!I) continue;
+
+ bool Folded = ConstantFoldTerminator(I->getParent());
+ if (!Folded) {
+ // The constant folder may not have been able to fold the terminator
+ // if this is a branch or switch on undef. Fold it manually as a
+ // branch to the first successor.
+#ifndef NDEBUG
+ if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+ assert(BI->isConditional() && isa<UndefValue>(BI->getCondition()) &&
+ "Branch should be foldable!");
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+ assert(isa<UndefValue>(SI->getCondition()) && "Switch should fold");
+ } else {
+ llvm_unreachable("Didn't fold away reference to block!");
+ }
+#endif
+
+ // Make this an uncond branch to the first successor.
+ TerminatorInst *TI = I->getParent()->getTerminator();
+ BranchInst::Create(TI->getSuccessor(0), TI);
+
+ // Remove entries in successor phi nodes to remove edges.
+ for (unsigned i = 1, e = TI->getNumSuccessors(); i != e; ++i)
+ TI->getSuccessor(i)->removePredecessor(TI->getParent());
+
+ // Remove the old terminator.
+ TI->eraseFromParent();
+ }
+ }
+
+ // Finally, delete the basic block.
+ F->getBasicBlockList().erase(DeadBB);
+ }
+ BlocksToErase.clear();
+ }
+
+ // If we inferred constant or undef return values for a function, we replaced
+ // all call uses with the inferred value. This means we don't need to bother
+ // actually returning anything from the function. Replace all return
+ // instructions with return undef.
+ //
+ // Do this in two stages: first identify the functions we should process, then
+ // actually zap their returns. This is important because we can only do this
+ // if the address of the function isn't taken. In cases where a return is the
+ // last use of a function, the order of processing functions would affect
+ // whether other functions are optimizable.
+ SmallVector<ReturnInst*, 8> ReturnsToZap;
+
+ // TODO: Process multiple value ret instructions also.
+ const DenseMap<Function*, LatticeVal> &RV = Solver.getTrackedRetVals();
+ for (DenseMap<Function*, LatticeVal>::const_iterator I = RV.begin(),
+ E = RV.end(); I != E; ++I) {
+ Function *F = I->first;
+ if (I->second.isOverdefined() || F->getReturnType()->isVoidTy())
+ continue;
+
+ // We can only do this if we know that nothing else can call the function.
+ if (!F->hasLocalLinkage() || AddressTakenFunctions.count(F))
+ continue;
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
+ if (!isa<UndefValue>(RI->getOperand(0)))
+ ReturnsToZap.push_back(RI);
+ }
+
+ // Zap all returns which we've identified as zap to change.
+ for (unsigned i = 0, e = ReturnsToZap.size(); i != e; ++i) {
+ Function *F = ReturnsToZap[i]->getParent()->getParent();
+ ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType()));
+ }
+
+ // If we infered constant or undef values for globals variables, we can delete
+ // the global and any stores that remain to it.
+ const DenseMap<GlobalVariable*, LatticeVal> &TG = Solver.getTrackedGlobals();
+ for (DenseMap<GlobalVariable*, LatticeVal>::const_iterator I = TG.begin(),
+ E = TG.end(); I != E; ++I) {
+ GlobalVariable *GV = I->first;
+ assert(!I->second.isOverdefined() &&
+ "Overdefined values should have been taken out of the map!");
+ DEBUG(dbgs() << "Found that GV '" << GV->getName() << "' is constant!\n");
+ while (!GV->use_empty()) {
+ StoreInst *SI = cast<StoreInst>(GV->use_back());
+ SI->eraseFromParent();
+ }
+ M.getGlobalList().erase(GV);
+ ++IPNumGlobalConst;
+ }
+
+ return MadeChanges;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
new file mode 100644
index 0000000..cb03423
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -0,0 +1,118 @@
+//===-- Scalar.cpp --------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the C bindings for libLLVMScalarOpts.a, which implements
+// several scalar transformations over the LLVM intermediate representation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Transforms/Scalar.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createAggressiveDCEPass());
+}
+
+void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createCFGSimplificationPass());
+}
+
+void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createDeadStoreEliminationPass());
+}
+
+void LLVMAddGVNPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createGVNPass());
+}
+
+void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createIndVarSimplifyPass());
+}
+
+void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createInstructionCombiningPass());
+}
+
+void LLVMAddJumpThreadingPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createJumpThreadingPass());
+}
+
+void LLVMAddLICMPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLICMPass());
+}
+
+void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopDeletionPass());
+}
+
+void LLVMAddLoopIndexSplitPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopIndexSplitPass());
+}
+
+void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopRotatePass());
+}
+
+void LLVMAddLoopUnrollPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopUnrollPass());
+}
+
+void LLVMAddLoopUnswitchPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopUnswitchPass());
+}
+
+void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createMemCpyOptPass());
+}
+
+void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createPromoteMemoryToRegisterPass());
+}
+
+void LLVMAddReassociatePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createReassociatePass());
+}
+
+void LLVMAddSCCPPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createSCCPPass());
+}
+
+void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createScalarReplAggregatesPass());
+}
+
+void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM,
+ int Threshold) {
+ unwrap(PM)->add(createScalarReplAggregatesPass(Threshold));
+}
+
+void LLVMAddSimplifyLibCallsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createSimplifyLibCallsPass());
+}
+
+void LLVMAddTailCallEliminationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createTailCallEliminationPass());
+}
+
+void LLVMAddConstantPropagationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createConstantPropagationPass());
+}
+
+void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createDemoteRegisterToMemoryPass());
+}
+
+void LLVMAddVerifierPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createVerifierPass());
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
new file mode 100644
index 0000000..fee317d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -0,0 +1,1835 @@
+//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation implements the well known scalar replacement of
+// aggregates transformation. This xform breaks up alloca instructions of
+// aggregate type (structure or array) into individual alloca instructions for
+// each member (if possible). Then, if possible, it transforms the individual
+// alloca instructions into nice clean scalar SSA form.
+//
+// This combines a simple SRoA algorithm with the Mem2Reg algorithm because
+// often interact, especially for C++ programs. As such, iterating between
+// SRoA, then Mem2Reg until we run out of things to promote works well.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scalarrepl"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumReplaced, "Number of allocas broken up");
+STATISTIC(NumPromoted, "Number of allocas promoted");
+STATISTIC(NumConverted, "Number of aggregates converted to scalar");
+STATISTIC(NumGlobals, "Number of allocas copied from constant global");
+
+namespace {
+ struct SROA : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ explicit SROA(signed T = -1) : FunctionPass(ID) {
+ if (T == -1)
+ SRThreshold = 128;
+ else
+ SRThreshold = T;
+ }
+
+ bool runOnFunction(Function &F);
+
+ bool performScalarRepl(Function &F);
+ bool performPromotion(Function &F);
+
+ // getAnalysisUsage - This pass does not require any passes, but we know it
+ // will not alter the CFG, so say so.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<DominanceFrontier>();
+ AU.setPreservesCFG();
+ }
+
+ private:
+ TargetData *TD;
+
+ /// DeadInsts - Keep track of instructions we have made dead, so that
+ /// we can remove them after we are done working.
+ SmallVector<Value*, 32> DeadInsts;
+
+ /// AllocaInfo - When analyzing uses of an alloca instruction, this captures
+ /// information about the uses. All these fields are initialized to false
+ /// and set to true when something is learned.
+ struct AllocaInfo {
+ /// isUnsafe - This is set to true if the alloca cannot be SROA'd.
+ bool isUnsafe : 1;
+
+ /// isMemCpySrc - This is true if this aggregate is memcpy'd from.
+ bool isMemCpySrc : 1;
+
+ /// isMemCpyDst - This is true if this aggregate is memcpy'd into.
+ bool isMemCpyDst : 1;
+
+ AllocaInfo()
+ : isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false) {}
+ };
+
+ unsigned SRThreshold;
+
+ void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; }
+
+ bool isSafeAllocaToScalarRepl(AllocaInst *AI);
+
+ void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+ AllocaInfo &Info);
+ void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset,
+ AllocaInfo &Info);
+ void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize,
+ const Type *MemOpType, bool isStore, AllocaInfo &Info);
+ bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size);
+ uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset,
+ const Type *&IdxTy);
+
+ void DoScalarReplacement(AllocaInst *AI,
+ std::vector<AllocaInst*> &WorkList);
+ void DeleteDeadInstructions();
+
+ void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
+ AllocaInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts);
+
+ static MemTransferInst *isOnlyCopiedFromConstantGlobal(AllocaInst *AI);
+ };
+}
+
+char SROA::ID = 0;
+INITIALIZE_PASS(SROA, "scalarrepl",
+ "Scalar Replacement of Aggregates", false, false);
+
+// Public interface to the ScalarReplAggregates pass
+FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) {
+ return new SROA(Threshold);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Convert To Scalar Optimization.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// ConvertToScalarInfo - This class implements the "Convert To Scalar"
+/// optimization, which scans the uses of an alloca and determines if it can
+/// rewrite it in terms of a single new alloca that can be mem2reg'd.
+class ConvertToScalarInfo {
+ /// AllocaSize - The size of the alloca being considered.
+ unsigned AllocaSize;
+ const TargetData &TD;
+
+ /// IsNotTrivial - This is set to true if there is some access to the object
+ /// which means that mem2reg can't promote it.
+ bool IsNotTrivial;
+
+ /// VectorTy - This tracks the type that we should promote the vector to if
+ /// it is possible to turn it into a vector. This starts out null, and if it
+ /// isn't possible to turn into a vector type, it gets set to VoidTy.
+ const Type *VectorTy;
+
+ /// HadAVector - True if there is at least one vector access to the alloca.
+ /// We don't want to turn random arrays into vectors and use vector element
+ /// insert/extract, but if there are element accesses to something that is
+ /// also declared as a vector, we do want to promote to a vector.
+ bool HadAVector;
+
+public:
+ explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
+ : AllocaSize(Size), TD(td) {
+ IsNotTrivial = false;
+ VectorTy = 0;
+ HadAVector = false;
+ }
+
+ AllocaInst *TryConvert(AllocaInst *AI);
+
+private:
+ bool CanConvertToScalar(Value *V, uint64_t Offset);
+ void MergeInType(const Type *In, uint64_t Offset);
+ void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
+
+ Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
+ uint64_t Offset, IRBuilder<> &Builder);
+ Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
+ uint64_t Offset, IRBuilder<> &Builder);
+};
+} // end anonymous namespace.
+
+
+/// IsVerbotenVectorType - Return true if this is a vector type ScalarRepl isn't
+/// allowed to form. We do this to avoid MMX types, which is a complete hack,
+/// but is required until the backend is fixed.
+static bool IsVerbotenVectorType(const VectorType *VTy, const Instruction *I) {
+ StringRef Triple(I->getParent()->getParent()->getParent()->getTargetTriple());
+ if (!Triple.startswith("i386") &&
+ !Triple.startswith("x86_64"))
+ return false;
+
+ // Reject all the MMX vector types.
+ switch (VTy->getNumElements()) {
+ default: return false;
+ case 1: return VTy->getElementType()->isIntegerTy(64);
+ case 2: return VTy->getElementType()->isIntegerTy(32);
+ case 4: return VTy->getElementType()->isIntegerTy(16);
+ case 8: return VTy->getElementType()->isIntegerTy(8);
+ }
+}
+
+
+/// TryConvert - Analyze the specified alloca, and if it is safe to do so,
+/// rewrite it to be a new alloca which is mem2reg'able. This returns the new
+/// alloca if possible or null if not.
+AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
+ // If we can't convert this scalar, or if mem2reg can trivially do it, bail
+ // out.
+ if (!CanConvertToScalar(AI, 0) || !IsNotTrivial)
+ return 0;
+
+ // If we were able to find a vector type that can handle this with
+ // insert/extract elements, and if there was at least one use that had
+ // a vector type, promote this to a vector. We don't want to promote
+ // random stuff that doesn't use vectors (e.g. <9 x double>) because then
+ // we just get a lot of insert/extracts. If at least one vector is
+ // involved, then we probably really do have a union of vector/array.
+ const Type *NewTy;
+ if (VectorTy && VectorTy->isVectorTy() && HadAVector &&
+ !IsVerbotenVectorType(cast<VectorType>(VectorTy), AI)) {
+ DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = "
+ << *VectorTy << '\n');
+ NewTy = VectorTy; // Use the vector type.
+ } else {
+ DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
+ // Create and insert the integer alloca.
+ NewTy = IntegerType::get(AI->getContext(), AllocaSize*8);
+ }
+ AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
+ ConvertUsesToScalar(AI, NewAI, 0);
+ return NewAI;
+}
+
+/// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy)
+/// so far at the offset specified by Offset (which is specified in bytes).
+///
+/// There are two cases we handle here:
+/// 1) A union of vector types of the same size and potentially its elements.
+/// Here we turn element accesses into insert/extract element operations.
+/// This promotes a <4 x float> with a store of float to the third element
+/// into a <4 x float> that uses insert element.
+/// 2) A fully general blob of memory, which we turn into some (potentially
+/// large) integer type with extract and insert operations where the loads
+/// and stores would mutate the memory. We mark this by setting VectorTy
+/// to VoidTy.
+void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
+ // If we already decided to turn this into a blob of integer memory, there is
+ // nothing to be done.
+ if (VectorTy && VectorTy->isVoidTy())
+ return;
+
+ // If this could be contributing to a vector, analyze it.
+
+ // If the In type is a vector that is the same size as the alloca, see if it
+ // matches the existing VecTy.
+ if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
+ // Remember if we saw a vector type.
+ HadAVector = true;
+
+ if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
+ // If we're storing/loading a vector of the right size, allow it as a
+ // vector. If this the first vector we see, remember the type so that
+ // we know the element size. If this is a subsequent access, ignore it
+ // even if it is a differing type but the same size. Worst case we can
+ // bitcast the resultant vectors.
+ if (VectorTy == 0)
+ VectorTy = VInTy;
+ return;
+ }
+ } else if (In->isFloatTy() || In->isDoubleTy() ||
+ (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
+ isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
+ // If we're accessing something that could be an element of a vector, see
+ // if the implied vector agrees with what we already have and if Offset is
+ // compatible with it.
+ unsigned EltSize = In->getPrimitiveSizeInBits()/8;
+ if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
+ (VectorTy == 0 ||
+ cast<VectorType>(VectorTy)->getElementType()
+ ->getPrimitiveSizeInBits()/8 == EltSize)) {
+ if (VectorTy == 0)
+ VectorTy = VectorType::get(In, AllocaSize/EltSize);
+ return;
+ }
+ }
+
+ // Otherwise, we have a case that we can't handle with an optimized vector
+ // form. We can still turn this into a large integer.
+ VectorTy = Type::getVoidTy(In->getContext());
+}
+
+/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
+/// its accesses to a single vector type, return true and set VecTy to
+/// the new type. If we could convert the alloca into a single promotable
+/// integer, return true but set VecTy to VoidTy. Further, if the use is not a
+/// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset
+/// is the current offset from the base of the alloca being analyzed.
+///
+/// If we see at least one access to the value that is as a vector type, set the
+/// SawVec flag.
+bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ // Don't break volatile loads.
+ if (LI->isVolatile())
+ return false;
+ MergeInType(LI->getType(), Offset);
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Storing the pointer, not into the value?
+ if (SI->getOperand(0) == V || SI->isVolatile()) return false;
+ MergeInType(SI->getOperand(0)->getType(), Offset);
+ continue;
+ }
+
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
+ IsNotTrivial = true; // Can't be mem2reg'd.
+ if (!CanConvertToScalar(BCI, Offset))
+ return false;
+ continue;
+ }
+
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+ // If this is a GEP with a variable indices, we can't handle it.
+ if (!GEP->hasAllConstantIndices())
+ return false;
+
+ // Compute the offset that this GEP adds to the pointer.
+ SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+ uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
+ &Indices[0], Indices.size());
+ // See if all uses can be converted.
+ if (!CanConvertToScalar(GEP, Offset+GEPOffset))
+ return false;
+ IsNotTrivial = true; // Can't be mem2reg'd.
+ continue;
+ }
+
+ // If this is a constant sized memset of a constant value (e.g. 0) we can
+ // handle it.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
+ // Store of constant value and constant size.
+ if (!isa<ConstantInt>(MSI->getValue()) ||
+ !isa<ConstantInt>(MSI->getLength()))
+ return false;
+ IsNotTrivial = true; // Can't be mem2reg'd.
+ continue;
+ }
+
+ // If this is a memcpy or memmove into or out of the whole allocation, we
+ // can handle it like a load or store of the scalar type.
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
+ ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
+ if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
+ return false;
+
+ IsNotTrivial = true; // Can't be mem2reg'd.
+ continue;
+ }
+
+ // Otherwise, we cannot handle this!
+ return false;
+ }
+
+ return true;
+}
+
+/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
+/// directly. This happens when we are converting an "integer union" to a
+/// single integer scalar, or when we are converting a "vector union" to a
+/// vector with insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right. By the end of this, there should be no uses of Ptr.
+void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
+ uint64_t Offset) {
+ while (!Ptr->use_empty()) {
+ Instruction *User = cast<Instruction>(Ptr->use_back());
+
+ if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
+ ConvertUsesToScalar(CI, NewAI, Offset);
+ CI->eraseFromParent();
+ continue;
+ }
+
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+ // Compute the offset that this GEP adds to the pointer.
+ SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+ uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
+ &Indices[0], Indices.size());
+ ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
+ GEP->eraseFromParent();
+ continue;
+ }
+
+ IRBuilder<> Builder(User->getParent(), User);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ // The load is a bit extract from NewAI shifted right by Offset bits.
+ Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
+ Value *NewLoadVal
+ = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
+ LI->replaceAllUsesWith(NewLoadVal);
+ LI->eraseFromParent();
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ assert(SI->getOperand(0) != Ptr && "Consistency error!");
+ Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
+ Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
+ Builder);
+ Builder.CreateStore(New, NewAI);
+ SI->eraseFromParent();
+
+ // If the load we just inserted is now dead, then the inserted store
+ // overwrote the entire thing.
+ if (Old->use_empty())
+ Old->eraseFromParent();
+ continue;
+ }
+
+ // If this is a constant sized memset of a constant value (e.g. 0) we can
+ // transform it into a store of the expanded constant value.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
+ assert(MSI->getRawDest() == Ptr && "Consistency error!");
+ unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+ if (NumBytes != 0) {
+ unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
+
+ // Compute the value replicated the right number of times.
+ APInt APVal(NumBytes*8, Val);
+
+ // Splat the value if non-zero.
+ if (Val)
+ for (unsigned i = 1; i != NumBytes; ++i)
+ APVal |= APVal << 8;
+
+ Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
+ Value *New = ConvertScalar_InsertValue(
+ ConstantInt::get(User->getContext(), APVal),
+ Old, Offset, Builder);
+ Builder.CreateStore(New, NewAI);
+
+ // If the load we just inserted is now dead, then the memset overwrote
+ // the entire thing.
+ if (Old->use_empty())
+ Old->eraseFromParent();
+ }
+ MSI->eraseFromParent();
+ continue;
+ }
+
+ // If this is a memcpy or memmove into or out of the whole allocation, we
+ // can handle it like a load or store of the scalar type.
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
+ assert(Offset == 0 && "must be store to start of alloca");
+
+ // If the source and destination are both to the same alloca, then this is
+ // a noop copy-to-self, just delete it. Otherwise, emit a load and store
+ // as appropriate.
+ AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject(0));
+
+ if (MTI->getSource()->getUnderlyingObject(0) != OrigAI) {
+ // Dest must be OrigAI, change this to be a load from the original
+ // pointer (bitcasted), then a store to our new alloca.
+ assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
+ Value *SrcPtr = MTI->getSource();
+ SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType());
+
+ LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
+ SrcVal->setAlignment(MTI->getAlignment());
+ Builder.CreateStore(SrcVal, NewAI);
+ } else if (MTI->getDest()->getUnderlyingObject(0) != OrigAI) {
+ // Src must be OrigAI, change this to be a load from NewAI then a store
+ // through the original dest pointer (bitcasted).
+ assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
+ LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
+
+ Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType());
+ StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
+ NewStore->setAlignment(MTI->getAlignment());
+ } else {
+ // Noop transfer. Src == Dst
+ }
+
+ MTI->eraseFromParent();
+ continue;
+ }
+
+ llvm_unreachable("Unsupported operation!");
+ }
+}
+
+/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
+/// or vector value FromVal, extracting the bits from the offset specified by
+/// Offset. This returns the value, which is of type ToType.
+///
+/// This happens when we are converting an "integer union" to a single
+/// integer scalar, or when we are converting a "vector union" to a vector with
+/// insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right.
+Value *ConvertToScalarInfo::
+ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
+ uint64_t Offset, IRBuilder<> &Builder) {
+ // If the load is of the whole new alloca, no conversion is needed.
+ if (FromVal->getType() == ToType && Offset == 0)
+ return FromVal;
+
+ // If the result alloca is a vector type, this is either an element
+ // access or a bitcast to another vector type of the same size.
+ if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
+ if (ToType->isVectorTy())
+ return Builder.CreateBitCast(FromVal, ToType, "tmp");
+
+ // Otherwise it must be an element access.
+ unsigned Elt = 0;
+ if (Offset) {
+ unsigned EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+ Elt = Offset/EltSize;
+ assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
+ }
+ // Return the element extracted out of it.
+ Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get(
+ Type::getInt32Ty(FromVal->getContext()), Elt), "tmp");
+ if (V->getType() != ToType)
+ V = Builder.CreateBitCast(V, ToType, "tmp");
+ return V;
+ }
+
+ // If ToType is a first class aggregate, extract out each of the pieces and
+ // use insertvalue's to form the FCA.
+ if (const StructType *ST = dyn_cast<StructType>(ToType)) {
+ const StructLayout &Layout = *TD.getStructLayout(ST);
+ Value *Res = UndefValue::get(ST);
+ for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+ Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
+ Offset+Layout.getElementOffsetInBits(i),
+ Builder);
+ Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
+ }
+ return Res;
+ }
+
+ if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
+ uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
+ Value *Res = UndefValue::get(AT);
+ for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+ Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
+ Offset+i*EltSize, Builder);
+ Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
+ }
+ return Res;
+ }
+
+ // Otherwise, this must be a union that was converted to an integer value.
+ const IntegerType *NTy = cast<IntegerType>(FromVal->getType());
+
+ // If this is a big-endian system and the load is narrower than the
+ // full alloca type, we need to do a shift to get the right bits.
+ int ShAmt = 0;
+ if (TD.isBigEndian()) {
+ // On big-endian machines, the lowest bit is stored at the bit offset
+ // from the pointer given by getTypeStoreSizeInBits. This matters for
+ // integers with a bitwidth that is not a multiple of 8.
+ ShAmt = TD.getTypeStoreSizeInBits(NTy) -
+ TD.getTypeStoreSizeInBits(ToType) - Offset;
+ } else {
+ ShAmt = Offset;
+ }
+
+ // Note: we support negative bitwidths (with shl) which are not defined.
+ // We do this to support (f.e.) loads off the end of a structure where
+ // only some bits are used.
+ if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
+ FromVal = Builder.CreateLShr(FromVal,
+ ConstantInt::get(FromVal->getType(),
+ ShAmt), "tmp");
+ else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
+ FromVal = Builder.CreateShl(FromVal,
+ ConstantInt::get(FromVal->getType(),
+ -ShAmt), "tmp");
+
+ // Finally, unconditionally truncate the integer to the right width.
+ unsigned LIBitWidth = TD.getTypeSizeInBits(ToType);
+ if (LIBitWidth < NTy->getBitWidth())
+ FromVal =
+ Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
+ LIBitWidth), "tmp");
+ else if (LIBitWidth > NTy->getBitWidth())
+ FromVal =
+ Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
+ LIBitWidth), "tmp");
+
+ // If the result is an integer, this is a trunc or bitcast.
+ if (ToType->isIntegerTy()) {
+ // Should be done.
+ } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
+ // Just do a bitcast, we know the sizes match up.
+ FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
+ } else {
+ // Otherwise must be a pointer.
+ FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp");
+ }
+ assert(FromVal->getType() == ToType && "Didn't convert right?");
+ return FromVal;
+}
+
+/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
+/// or vector value "Old" at the offset specified by Offset.
+///
+/// This happens when we are converting an "integer union" to a
+/// single integer scalar, or when we are converting a "vector union" to a
+/// vector with insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right.
+Value *ConvertToScalarInfo::
+ConvertScalar_InsertValue(Value *SV, Value *Old,
+ uint64_t Offset, IRBuilder<> &Builder) {
+ // Convert the stored type to the actual type, shift it left to insert
+ // then 'or' into place.
+ const Type *AllocaType = Old->getType();
+ LLVMContext &Context = Old->getContext();
+
+ if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
+ uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy);
+ uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType());
+
+ // Changing the whole vector with memset or with an access of a different
+ // vector type?
+ if (ValSize == VecSize)
+ return Builder.CreateBitCast(SV, AllocaType, "tmp");
+
+ uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+
+ // Must be an element insertion.
+ unsigned Elt = Offset/EltSize;
+
+ if (SV->getType() != VTy->getElementType())
+ SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
+
+ SV = Builder.CreateInsertElement(Old, SV,
+ ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
+ "tmp");
+ return SV;
+ }
+
+ // If SV is a first-class aggregate value, insert each value recursively.
+ if (const StructType *ST = dyn_cast<StructType>(SV->getType())) {
+ const StructLayout &Layout = *TD.getStructLayout(ST);
+ for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+ Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
+ Old = ConvertScalar_InsertValue(Elt, Old,
+ Offset+Layout.getElementOffsetInBits(i),
+ Builder);
+ }
+ return Old;
+ }
+
+ if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
+ uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
+ for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+ Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
+ Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
+ }
+ return Old;
+ }
+
+ // If SV is a float, convert it to the appropriate integer type.
+ // If it is a pointer, do the same.
+ unsigned SrcWidth = TD.getTypeSizeInBits(SV->getType());
+ unsigned DestWidth = TD.getTypeSizeInBits(AllocaType);
+ unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType());
+ unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType);
+ if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
+ SV = Builder.CreateBitCast(SV,
+ IntegerType::get(SV->getContext(),SrcWidth), "tmp");
+ else if (SV->getType()->isPointerTy())
+ SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()), "tmp");
+
+ // Zero extend or truncate the value if needed.
+ if (SV->getType() != AllocaType) {
+ if (SV->getType()->getPrimitiveSizeInBits() <
+ AllocaType->getPrimitiveSizeInBits())
+ SV = Builder.CreateZExt(SV, AllocaType, "tmp");
+ else {
+ // Truncation may be needed if storing more than the alloca can hold
+ // (undefined behavior).
+ SV = Builder.CreateTrunc(SV, AllocaType, "tmp");
+ SrcWidth = DestWidth;
+ SrcStoreWidth = DestStoreWidth;
+ }
+ }
+
+ // If this is a big-endian system and the store is narrower than the
+ // full alloca type, we need to do a shift to get the right bits.
+ int ShAmt = 0;
+ if (TD.isBigEndian()) {
+ // On big-endian machines, the lowest bit is stored at the bit offset
+ // from the pointer given by getTypeStoreSizeInBits. This matters for
+ // integers with a bitwidth that is not a multiple of 8.
+ ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
+ } else {
+ ShAmt = Offset;
+ }
+
+ // Note: we support negative bitwidths (with shr) which are not defined.
+ // We do this to support (f.e.) stores off the end of a structure where
+ // only some bits in the structure are set.
+ APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
+ if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
+ SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(),
+ ShAmt), "tmp");
+ Mask <<= ShAmt;
+ } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
+ SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(),
+ -ShAmt), "tmp");
+ Mask = Mask.lshr(-ShAmt);
+ }
+
+ // Mask out the bits we are about to insert from the old value, and or
+ // in the new bits.
+ if (SrcWidth != DestWidth) {
+ assert(DestWidth > SrcWidth);
+ Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask");
+ SV = Builder.CreateOr(Old, SV, "ins");
+ }
+ return SV;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SRoA Driver
+//===----------------------------------------------------------------------===//
+
+
+bool SROA::runOnFunction(Function &F) {
+ TD = getAnalysisIfAvailable<TargetData>();
+
+ bool Changed = performPromotion(F);
+
+ // FIXME: ScalarRepl currently depends on TargetData more than it
+ // theoretically needs to. It should be refactored in order to support
+ // target-independent IR. Until this is done, just skip the actual
+ // scalar-replacement portion of this pass.
+ if (!TD) return Changed;
+
+ while (1) {
+ bool LocalChange = performScalarRepl(F);
+ if (!LocalChange) break; // No need to repromote if no scalarrepl
+ Changed = true;
+ LocalChange = performPromotion(F);
+ if (!LocalChange) break; // No need to re-scalarrepl if no promotion
+ }
+
+ return Changed;
+}
+
+
+bool SROA::performPromotion(Function &F) {
+ std::vector<AllocaInst*> Allocas;
+ DominatorTree &DT = getAnalysis<DominatorTree>();
+ DominanceFrontier &DF = getAnalysis<DominanceFrontier>();
+
+ BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
+
+ bool Changed = false;
+
+ while (1) {
+ Allocas.clear();
+
+ // Find allocas that are safe to promote, by looking at all instructions in
+ // the entry node
+ for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
+ if (isAllocaPromotable(AI))
+ Allocas.push_back(AI);
+
+ if (Allocas.empty()) break;
+
+ PromoteMemToReg(Allocas, DT, DF);
+ NumPromoted += Allocas.size();
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+
+/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
+/// SROA. It must be a struct or array type with a small number of elements.
+static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
+ const Type *T = AI->getAllocatedType();
+ // Do not promote any struct into more than 32 separate vars.
+ if (const StructType *ST = dyn_cast<StructType>(T))
+ return ST->getNumElements() <= 32;
+ // Arrays are much less likely to be safe for SROA; only consider
+ // them if they are very small.
+ if (const ArrayType *AT = dyn_cast<ArrayType>(T))
+ return AT->getNumElements() <= 8;
+ return false;
+}
+
+
+// performScalarRepl - This algorithm is a simple worklist driven algorithm,
+// which runs on all of the malloc/alloca instructions in the function, removing
+// them if they are only used by getelementptr instructions.
+//
+bool SROA::performScalarRepl(Function &F) {
+ std::vector<AllocaInst*> WorkList;
+
+ // Scan the entry basic block, adding allocas to the worklist.
+ BasicBlock &BB = F.getEntryBlock();
+ for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
+ if (AllocaInst *A = dyn_cast<AllocaInst>(I))
+ WorkList.push_back(A);
+
+ // Process the worklist
+ bool Changed = false;
+ while (!WorkList.empty()) {
+ AllocaInst *AI = WorkList.back();
+ WorkList.pop_back();
+
+ // Handle dead allocas trivially. These can be formed by SROA'ing arrays
+ // with unused elements.
+ if (AI->use_empty()) {
+ AI->eraseFromParent();
+ Changed = true;
+ continue;
+ }
+
+ // If this alloca is impossible for us to promote, reject it early.
+ if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
+ continue;
+
+ // Check to see if this allocation is only modified by a memcpy/memmove from
+ // a constant global. If this is the case, we can change all users to use
+ // the constant global instead. This is commonly produced by the CFE by
+ // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
+ // is only subsequently read.
+ if (MemTransferInst *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
+ DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
+ DEBUG(dbgs() << " memcpy = " << *TheCopy << '\n');
+ Constant *TheSrc = cast<Constant>(TheCopy->getSource());
+ AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
+ TheCopy->eraseFromParent(); // Don't mutate the global.
+ AI->eraseFromParent();
+ ++NumGlobals;
+ Changed = true;
+ continue;
+ }
+
+ // Check to see if we can perform the core SROA transformation. We cannot
+ // transform the allocation instruction if it is an array allocation
+ // (allocations OF arrays are ok though), and an allocation of a scalar
+ // value cannot be decomposed at all.
+ uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
+
+ // Do not promote [0 x %struct].
+ if (AllocaSize == 0) continue;
+
+ // Do not promote any struct whose size is too big.
+ if (AllocaSize > SRThreshold) continue;
+
+ // If the alloca looks like a good candidate for scalar replacement, and if
+ // all its users can be transformed, then split up the aggregate into its
+ // separate elements.
+ if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
+ DoScalarReplacement(AI, WorkList);
+ Changed = true;
+ continue;
+ }
+
+ // If we can turn this aggregate value (potentially with casts) into a
+ // simple scalar value that can be mem2reg'd into a register value.
+ // IsNotTrivial tracks whether this is something that mem2reg could have
+ // promoted itself. If so, we don't want to transform it needlessly. Note
+ // that we can't just check based on the type: the alloca may be of an i32
+ // but that has pointer arithmetic to set byte 3 of it or something.
+ if (AllocaInst *NewAI =
+ ConvertToScalarInfo((unsigned)AllocaSize, *TD).TryConvert(AI)) {
+ NewAI->takeName(AI);
+ AI->eraseFromParent();
+ ++NumConverted;
+ Changed = true;
+ continue;
+ }
+
+ // Otherwise, couldn't process this alloca.
+ }
+
+ return Changed;
+}
+
+/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
+/// predicate, do SROA now.
+void SROA::DoScalarReplacement(AllocaInst *AI,
+ std::vector<AllocaInst*> &WorkList) {
+ DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n');
+ SmallVector<AllocaInst*, 32> ElementAllocas;
+ if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
+ ElementAllocas.reserve(ST->getNumContainedTypes());
+ for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
+ AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
+ AI->getAlignment(),
+ AI->getName() + "." + Twine(i), AI);
+ ElementAllocas.push_back(NA);
+ WorkList.push_back(NA); // Add to worklist for recursive processing
+ }
+ } else {
+ const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType());
+ ElementAllocas.reserve(AT->getNumElements());
+ const Type *ElTy = AT->getElementType();
+ for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+ AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(),
+ AI->getName() + "." + Twine(i), AI);
+ ElementAllocas.push_back(NA);
+ WorkList.push_back(NA); // Add to worklist for recursive processing
+ }
+ }
+
+ // Now that we have created the new alloca instructions, rewrite all the
+ // uses of the old alloca.
+ RewriteForScalarRepl(AI, AI, 0, ElementAllocas);
+
+ // Now erase any instructions that were made dead while rewriting the alloca.
+ DeleteDeadInstructions();
+ AI->eraseFromParent();
+
+ ++NumReplaced;
+}
+
+/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
+/// recursively including all their operands that become trivially dead.
+void SROA::DeleteDeadInstructions() {
+ while (!DeadInsts.empty()) {
+ Instruction *I = cast<Instruction>(DeadInsts.pop_back_val());
+
+ for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+ if (Instruction *U = dyn_cast<Instruction>(*OI)) {
+ // Zero out the operand and see if it becomes trivially dead.
+ // (But, don't add allocas to the dead instruction list -- they are
+ // already on the worklist and will be deleted separately.)
+ *OI = 0;
+ if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U))
+ DeadInsts.push_back(U);
+ }
+
+ I->eraseFromParent();
+ }
+}
+
+/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to
+/// performing scalar replacement of alloca AI. The results are flagged in
+/// the Info parameter. Offset indicates the position within AI that is
+/// referenced by this instruction.
+void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+ AllocaInfo &Info) {
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+ isSafeForScalarRepl(BC, AI, Offset, Info);
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ uint64_t GEPOffset = Offset;
+ isSafeGEP(GEPI, AI, GEPOffset, Info);
+ if (!Info.isUnsafe)
+ isSafeForScalarRepl(GEPI, AI, GEPOffset, Info);
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
+ ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
+ if (Length)
+ isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0,
+ UI.getOperandNo() == 0, Info);
+ else
+ MarkUnsafe(Info);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ if (!LI->isVolatile()) {
+ const Type *LIType = LI->getType();
+ isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(LIType),
+ LIType, false, Info);
+ } else
+ MarkUnsafe(Info);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Store is ok if storing INTO the pointer, not storing the pointer
+ if (!SI->isVolatile() && SI->getOperand(0) != I) {
+ const Type *SIType = SI->getOperand(0)->getType();
+ isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(SIType),
+ SIType, true, Info);
+ } else
+ MarkUnsafe(Info);
+ } else {
+ DEBUG(errs() << " Transformation preventing inst: " << *User << '\n');
+ MarkUnsafe(Info);
+ }
+ if (Info.isUnsafe) return;
+ }
+}
+
+/// isSafeGEP - Check if a GEP instruction can be handled for scalar
+/// replacement. It is safe when all the indices are constant, in-bounds
+/// references, and when the resulting offset corresponds to an element within
+/// the alloca type. The results are flagged in the Info parameter. Upon
+/// return, Offset is adjusted as specified by the GEP indices.
+void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI,
+ uint64_t &Offset, AllocaInfo &Info) {
+ gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
+ if (GEPIt == E)
+ return;
+
+ // Walk through the GEP type indices, checking the types that this indexes
+ // into.
+ for (; GEPIt != E; ++GEPIt) {
+ // Ignore struct elements, no extra checking needed for these.
+ if ((*GEPIt)->isStructTy())
+ continue;
+
+ ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
+ if (!IdxVal)
+ return MarkUnsafe(Info);
+ }
+
+ // Compute the offset due to this GEP and check if the alloca has a
+ // component element at that offset.
+ SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
+ Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
+ &Indices[0], Indices.size());
+ if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0))
+ MarkUnsafe(Info);
+}
+
+/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI
+/// alloca or has an offset and size that corresponds to a component element
+/// within it. The offset checked here may have been formed from a GEP with a
+/// pointer bitcasted to a different type.
+void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize,
+ const Type *MemOpType, bool isStore,
+ AllocaInfo &Info) {
+ // Check if this is a load/store of the entire alloca.
+ if (Offset == 0 && MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) {
+ bool UsesAggregateType = (MemOpType == AI->getAllocatedType());
+ // This is safe for MemIntrinsics (where MemOpType is 0), integer types
+ // (which are essentially the same as the MemIntrinsics, especially with
+ // regard to copying padding between elements), or references using the
+ // aggregate type of the alloca.
+ if (!MemOpType || MemOpType->isIntegerTy() || UsesAggregateType) {
+ if (!UsesAggregateType) {
+ if (isStore)
+ Info.isMemCpyDst = true;
+ else
+ Info.isMemCpySrc = true;
+ }
+ return;
+ }
+ }
+ // Check if the offset/size correspond to a component within the alloca type.
+ const Type *T = AI->getAllocatedType();
+ if (TypeHasComponent(T, Offset, MemSize))
+ return;
+
+ return MarkUnsafe(Info);
+}
+
+/// TypeHasComponent - Return true if T has a component type with the
+/// specified offset and size. If Size is zero, do not check the size.
+bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) {
+ const Type *EltTy;
+ uint64_t EltSize;
+ if (const StructType *ST = dyn_cast<StructType>(T)) {
+ const StructLayout *Layout = TD->getStructLayout(ST);
+ unsigned EltIdx = Layout->getElementContainingOffset(Offset);
+ EltTy = ST->getContainedType(EltIdx);
+ EltSize = TD->getTypeAllocSize(EltTy);
+ Offset -= Layout->getElementOffset(EltIdx);
+ } else if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
+ EltTy = AT->getElementType();
+ EltSize = TD->getTypeAllocSize(EltTy);
+ if (Offset >= AT->getNumElements() * EltSize)
+ return false;
+ Offset %= EltSize;
+ } else {
+ return false;
+ }
+ if (Offset == 0 && (Size == 0 || EltSize == Size))
+ return true;
+ // Check if the component spans multiple elements.
+ if (Offset + Size > EltSize)
+ return false;
+ return TypeHasComponent(EltTy, Offset, Size);
+}
+
+/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite
+/// the instruction I, which references it, to use the separate elements.
+/// Offset indicates the position within AI that is referenced by this
+/// instruction.
+void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+ RewriteBitCast(BC, AI, Offset, NewElts);
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ RewriteGEP(GEPI, AI, Offset, NewElts);
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
+ ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
+ uint64_t MemSize = Length->getZExtValue();
+ if (Offset == 0 &&
+ MemSize == TD->getTypeAllocSize(AI->getAllocatedType()))
+ RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
+ // Otherwise the intrinsic can only touch a single element and the
+ // address operand will be updated, so nothing else needs to be done.
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ const Type *LIType = LI->getType();
+ if (LIType == AI->getAllocatedType()) {
+ // Replace:
+ // %res = load { i32, i32 }* %alloc
+ // with:
+ // %load.0 = load i32* %alloc.0
+ // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0
+ // %load.1 = load i32* %alloc.1
+ // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
+ // (Also works for arrays instead of structs)
+ Value *Insert = UndefValue::get(LIType);
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ Value *Load = new LoadInst(NewElts[i], "load", LI);
+ Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI);
+ }
+ LI->replaceAllUsesWith(Insert);
+ DeadInsts.push_back(LI);
+ } else if (LIType->isIntegerTy() &&
+ TD->getTypeAllocSize(LIType) ==
+ TD->getTypeAllocSize(AI->getAllocatedType())) {
+ // If this is a load of the entire alloca to an integer, rewrite it.
+ RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
+ }
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ Value *Val = SI->getOperand(0);
+ const Type *SIType = Val->getType();
+ if (SIType == AI->getAllocatedType()) {
+ // Replace:
+ // store { i32, i32 } %val, { i32, i32 }* %alloc
+ // with:
+ // %val.0 = extractvalue { i32, i32 } %val, 0
+ // store i32 %val.0, i32* %alloc.0
+ // %val.1 = extractvalue { i32, i32 } %val, 1
+ // store i32 %val.1, i32* %alloc.1
+ // (Also works for arrays instead of structs)
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI);
+ new StoreInst(Extract, NewElts[i], SI);
+ }
+ DeadInsts.push_back(SI);
+ } else if (SIType->isIntegerTy() &&
+ TD->getTypeAllocSize(SIType) ==
+ TD->getTypeAllocSize(AI->getAllocatedType())) {
+ // If this is a store of the entire alloca from an integer, rewrite it.
+ RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
+ }
+ }
+ }
+}
+
+/// RewriteBitCast - Update a bitcast reference to the alloca being replaced
+/// and recursively continue updating all of its uses.
+void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ RewriteForScalarRepl(BC, AI, Offset, NewElts);
+ if (BC->getOperand(0) != AI)
+ return;
+
+ // The bitcast references the original alloca. Replace its uses with
+ // references to the first new element alloca.
+ Instruction *Val = NewElts[0];
+ if (Val->getType() != BC->getDestTy()) {
+ Val = new BitCastInst(Val, BC->getDestTy(), "", BC);
+ Val->takeName(BC);
+ }
+ BC->replaceAllUsesWith(Val);
+ DeadInsts.push_back(BC);
+}
+
+/// FindElementAndOffset - Return the index of the element containing Offset
+/// within the specified type, which must be either a struct or an array.
+/// Sets T to the type of the element and Offset to the offset within that
+/// element. IdxTy is set to the type of the index result to be used in a
+/// GEP instruction.
+uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset,
+ const Type *&IdxTy) {
+ uint64_t Idx = 0;
+ if (const StructType *ST = dyn_cast<StructType>(T)) {
+ const StructLayout *Layout = TD->getStructLayout(ST);
+ Idx = Layout->getElementContainingOffset(Offset);
+ T = ST->getContainedType(Idx);
+ Offset -= Layout->getElementOffset(Idx);
+ IdxTy = Type::getInt32Ty(T->getContext());
+ return Idx;
+ }
+ const ArrayType *AT = cast<ArrayType>(T);
+ T = AT->getElementType();
+ uint64_t EltSize = TD->getTypeAllocSize(T);
+ Idx = Offset / EltSize;
+ Offset -= Idx * EltSize;
+ IdxTy = Type::getInt64Ty(T->getContext());
+ return Idx;
+}
+
+/// RewriteGEP - Check if this GEP instruction moves the pointer across
+/// elements of the alloca that are being split apart, and if so, rewrite
+/// the GEP to be relative to the new element.
+void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ uint64_t OldOffset = Offset;
+ SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
+ Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
+ &Indices[0], Indices.size());
+
+ RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
+
+ const Type *T = AI->getAllocatedType();
+ const Type *IdxTy;
+ uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy);
+ if (GEPI->getOperand(0) == AI)
+ OldIdx = ~0ULL; // Force the GEP to be rewritten.
+
+ T = AI->getAllocatedType();
+ uint64_t EltOffset = Offset;
+ uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy);
+
+ // If this GEP does not move the pointer across elements of the alloca
+ // being split, then it does not needs to be rewritten.
+ if (Idx == OldIdx)
+ return;
+
+ const Type *i32Ty = Type::getInt32Ty(AI->getContext());
+ SmallVector<Value*, 8> NewArgs;
+ NewArgs.push_back(Constant::getNullValue(i32Ty));
+ while (EltOffset != 0) {
+ uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy);
+ NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx));
+ }
+ Instruction *Val = NewElts[Idx];
+ if (NewArgs.size() > 1) {
+ Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(),
+ NewArgs.end(), "", GEPI);
+ Val->takeName(GEPI);
+ }
+ if (Val->getType() != GEPI->getType())
+ Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI);
+ GEPI->replaceAllUsesWith(Val);
+ DeadInsts.push_back(GEPI);
+}
+
+/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
+/// Rewrite it to copy or set the elements of the scalarized memory.
+void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
+ AllocaInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ // If this is a memcpy/memmove, construct the other pointer as the
+ // appropriate type. The "Other" pointer is the pointer that goes to memory
+ // that doesn't have anything to do with the alloca that we are promoting. For
+ // memset, this Value* stays null.
+ Value *OtherPtr = 0;
+ unsigned MemAlignment = MI->getAlignment();
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
+ if (Inst == MTI->getRawDest())
+ OtherPtr = MTI->getRawSource();
+ else {
+ assert(Inst == MTI->getRawSource());
+ OtherPtr = MTI->getRawDest();
+ }
+ }
+
+ // If there is an other pointer, we want to convert it to the same pointer
+ // type as AI has, so we can GEP through it safely.
+ if (OtherPtr) {
+ unsigned AddrSpace =
+ cast<PointerType>(OtherPtr->getType())->getAddressSpace();
+
+ // Remove bitcasts and all-zero GEPs from OtherPtr. This is an
+ // optimization, but it's also required to detect the corner case where
+ // both pointer operands are referencing the same memory, and where
+ // OtherPtr may be a bitcast or GEP that currently being rewritten. (This
+ // function is only called for mem intrinsics that access the whole
+ // aggregate, so non-zero GEPs are not an issue here.)
+ OtherPtr = OtherPtr->stripPointerCasts();
+
+ // Copying the alloca to itself is a no-op: just delete it.
+ if (OtherPtr == AI || OtherPtr == NewElts[0]) {
+ // This code will run twice for a no-op memcpy -- once for each operand.
+ // Put only one reference to MI on the DeadInsts list.
+ for (SmallVector<Value*, 32>::const_iterator I = DeadInsts.begin(),
+ E = DeadInsts.end(); I != E; ++I)
+ if (*I == MI) return;
+ DeadInsts.push_back(MI);
+ return;
+ }
+
+ // If the pointer is not the right type, insert a bitcast to the right
+ // type.
+ const Type *NewTy =
+ PointerType::get(AI->getType()->getElementType(), AddrSpace);
+
+ if (OtherPtr->getType() != NewTy)
+ OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI);
+ }
+
+ // Process each element of the aggregate.
+ Value *TheFn = MI->getCalledValue();
+ const Type *BytePtrTy = MI->getRawDest()->getType();
+ bool SROADest = MI->getRawDest() == Inst;
+
+ Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
+
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ // If this is a memcpy/memmove, emit a GEP of the other element address.
+ Value *OtherElt = 0;
+ unsigned OtherEltAlign = MemAlignment;
+
+ if (OtherPtr) {
+ Value *Idx[2] = { Zero,
+ ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
+ OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2,
+ OtherPtr->getName()+"."+Twine(i),
+ MI);
+ uint64_t EltOffset;
+ const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
+ const Type *OtherTy = OtherPtrTy->getElementType();
+ if (const StructType *ST = dyn_cast<StructType>(OtherTy)) {
+ EltOffset = TD->getStructLayout(ST)->getElementOffset(i);
+ } else {
+ const Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
+ EltOffset = TD->getTypeAllocSize(EltTy)*i;
+ }
+
+ // The alignment of the other pointer is the guaranteed alignment of the
+ // element, which is affected by both the known alignment of the whole
+ // mem intrinsic and the alignment of the element. If the alignment of
+ // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the
+ // known alignment is just 4 bytes.
+ OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset);
+ }
+
+ Value *EltPtr = NewElts[i];
+ const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
+
+ // If we got down to a scalar, insert a load or store as appropriate.
+ if (EltTy->isSingleValueType()) {
+ if (isa<MemTransferInst>(MI)) {
+ if (SROADest) {
+ // From Other to Alloca.
+ Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI);
+ new StoreInst(Elt, EltPtr, MI);
+ } else {
+ // From Alloca to Other.
+ Value *Elt = new LoadInst(EltPtr, "tmp", MI);
+ new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI);
+ }
+ continue;
+ }
+ assert(isa<MemSetInst>(MI));
+
+ // If the stored element is zero (common case), just store a null
+ // constant.
+ Constant *StoreVal;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getArgOperand(1))) {
+ if (CI->isZero()) {
+ StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0>
+ } else {
+ // If EltTy is a vector type, get the element type.
+ const Type *ValTy = EltTy->getScalarType();
+
+ // Construct an integer with the right value.
+ unsigned EltSize = TD->getTypeSizeInBits(ValTy);
+ APInt OneVal(EltSize, CI->getZExtValue());
+ APInt TotalVal(OneVal);
+ // Set each byte.
+ for (unsigned i = 0; 8*i < EltSize; ++i) {
+ TotalVal = TotalVal.shl(8);
+ TotalVal |= OneVal;
+ }
+
+ // Convert the integer value to the appropriate type.
+ StoreVal = ConstantInt::get(CI->getContext(), TotalVal);
+ if (ValTy->isPointerTy())
+ StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
+ else if (ValTy->isFloatingPointTy())
+ StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
+ assert(StoreVal->getType() == ValTy && "Type mismatch!");
+
+ // If the requested value was a vector constant, create it.
+ if (EltTy != ValTy) {
+ unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
+ SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
+ StoreVal = ConstantVector::get(&Elts[0], NumElts);
+ }
+ }
+ new StoreInst(StoreVal, EltPtr, MI);
+ continue;
+ }
+ // Otherwise, if we're storing a byte variable, use a memset call for
+ // this element.
+ }
+
+ // Cast the element pointer to BytePtrTy.
+ if (EltPtr->getType() != BytePtrTy)
+ EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI);
+
+ // Cast the other pointer (if we have one) to BytePtrTy.
+ if (OtherElt && OtherElt->getType() != BytePtrTy) {
+ // Preserve address space of OtherElt
+ const PointerType* OtherPTy = cast<PointerType>(OtherElt->getType());
+ const PointerType* PTy = cast<PointerType>(BytePtrTy);
+ if (OtherPTy->getElementType() != PTy->getElementType()) {
+ Type *NewOtherPTy = PointerType::get(PTy->getElementType(),
+ OtherPTy->getAddressSpace());
+ OtherElt = new BitCastInst(OtherElt, NewOtherPTy,
+ OtherElt->getNameStr(), MI);
+ }
+ }
+
+ unsigned EltSize = TD->getTypeAllocSize(EltTy);
+
+ // Finally, insert the meminst for this element.
+ if (isa<MemTransferInst>(MI)) {
+ Value *Ops[] = {
+ SROADest ? EltPtr : OtherElt, // Dest ptr
+ SROADest ? OtherElt : EltPtr, // Src ptr
+ ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
+ // Align
+ ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign),
+ MI->getVolatileCst()
+ };
+ // In case we fold the address space overloaded memcpy of A to B
+ // with memcpy of B to C, change the function to be a memcpy of A to C.
+ const Type *Tys[] = { Ops[0]->getType(), Ops[1]->getType(),
+ Ops[2]->getType() };
+ Module *M = MI->getParent()->getParent()->getParent();
+ TheFn = Intrinsic::getDeclaration(M, MI->getIntrinsicID(), Tys, 3);
+ CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
+ } else {
+ assert(isa<MemSetInst>(MI));
+ Value *Ops[] = {
+ EltPtr, MI->getArgOperand(1), // Dest, Value,
+ ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
+ Zero, // Align
+ ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile
+ };
+ const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
+ Module *M = MI->getParent()->getParent()->getParent();
+ TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
+ CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
+ }
+ }
+ DeadInsts.push_back(MI);
+}
+
+/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that
+/// overwrites the entire allocation. Extract out the pieces of the stored
+/// integer and store them individually.
+void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts){
+ // Extract each element out of the integer according to its structure offset
+ // and store the element value to the individual alloca.
+ Value *SrcVal = SI->getOperand(0);
+ const Type *AllocaEltTy = AI->getAllocatedType();
+ uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
+
+ // Handle tail padding by extending the operand
+ if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
+ SrcVal = new ZExtInst(SrcVal,
+ IntegerType::get(SI->getContext(), AllocaSizeBits),
+ "", SI);
+
+ DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI
+ << '\n');
+
+ // There are two forms here: AI could be an array or struct. Both cases
+ // have different ways to compute the element offset.
+ if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
+ const StructLayout *Layout = TD->getStructLayout(EltSTy);
+
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ // Get the number of bits to shift SrcVal to get the value.
+ const Type *FieldTy = EltSTy->getElementType(i);
+ uint64_t Shift = Layout->getElementOffsetInBits(i);
+
+ if (TD->isBigEndian())
+ Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy);
+
+ Value *EltVal = SrcVal;
+ if (Shift) {
+ Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
+ EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
+ "sroa.store.elt", SI);
+ }
+
+ // Truncate down to an integer of the right size.
+ uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
+
+ // Ignore zero sized fields like {}, they obviously contain no data.
+ if (FieldSizeBits == 0) continue;
+
+ if (FieldSizeBits != AllocaSizeBits)
+ EltVal = new TruncInst(EltVal,
+ IntegerType::get(SI->getContext(), FieldSizeBits),
+ "", SI);
+ Value *DestField = NewElts[i];
+ if (EltVal->getType() == FieldTy) {
+ // Storing to an integer field of this size, just do it.
+ } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) {
+ // Bitcast to the right element type (for fp/vector values).
+ EltVal = new BitCastInst(EltVal, FieldTy, "", SI);
+ } else {
+ // Otherwise, bitcast the dest pointer (for aggregates).
+ DestField = new BitCastInst(DestField,
+ PointerType::getUnqual(EltVal->getType()),
+ "", SI);
+ }
+ new StoreInst(EltVal, DestField, SI);
+ }
+
+ } else {
+ const ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
+ const Type *ArrayEltTy = ATy->getElementType();
+ uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
+ uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
+
+ uint64_t Shift;
+
+ if (TD->isBigEndian())
+ Shift = AllocaSizeBits-ElementOffset;
+ else
+ Shift = 0;
+
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ // Ignore zero sized fields like {}, they obviously contain no data.
+ if (ElementSizeBits == 0) continue;
+
+ Value *EltVal = SrcVal;
+ if (Shift) {
+ Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
+ EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
+ "sroa.store.elt", SI);
+ }
+
+ // Truncate down to an integer of the right size.
+ if (ElementSizeBits != AllocaSizeBits)
+ EltVal = new TruncInst(EltVal,
+ IntegerType::get(SI->getContext(),
+ ElementSizeBits),"",SI);
+ Value *DestField = NewElts[i];
+ if (EltVal->getType() == ArrayEltTy) {
+ // Storing to an integer field of this size, just do it.
+ } else if (ArrayEltTy->isFloatingPointTy() ||
+ ArrayEltTy->isVectorTy()) {
+ // Bitcast to the right element type (for fp/vector values).
+ EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI);
+ } else {
+ // Otherwise, bitcast the dest pointer (for aggregates).
+ DestField = new BitCastInst(DestField,
+ PointerType::getUnqual(EltVal->getType()),
+ "", SI);
+ }
+ new StoreInst(EltVal, DestField, SI);
+
+ if (TD->isBigEndian())
+ Shift -= ElementOffset;
+ else
+ Shift += ElementOffset;
+ }
+ }
+
+ DeadInsts.push_back(SI);
+}
+
+/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to
+/// an integer. Load the individual pieces to form the aggregate value.
+void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ // Extract each element out of the NewElts according to its structure offset
+ // and form the result value.
+ const Type *AllocaEltTy = AI->getAllocatedType();
+ uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
+
+ DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
+ << '\n');
+
+ // There are two forms here: AI could be an array or struct. Both cases
+ // have different ways to compute the element offset.
+ const StructLayout *Layout = 0;
+ uint64_t ArrayEltBitOffset = 0;
+ if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
+ Layout = TD->getStructLayout(EltSTy);
+ } else {
+ const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
+ ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
+ }
+
+ Value *ResultVal =
+ Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits));
+
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ // Load the value from the alloca. If the NewElt is an aggregate, cast
+ // the pointer to an integer of the same size before doing the load.
+ Value *SrcField = NewElts[i];
+ const Type *FieldTy =
+ cast<PointerType>(SrcField->getType())->getElementType();
+ uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
+
+ // Ignore zero sized fields like {}, they obviously contain no data.
+ if (FieldSizeBits == 0) continue;
+
+ const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
+ FieldSizeBits);
+ if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
+ !FieldTy->isVectorTy())
+ SrcField = new BitCastInst(SrcField,
+ PointerType::getUnqual(FieldIntTy),
+ "", LI);
+ SrcField = new LoadInst(SrcField, "sroa.load.elt", LI);
+
+ // If SrcField is a fp or vector of the right size but that isn't an
+ // integer type, bitcast to an integer so we can shift it.
+ if (SrcField->getType() != FieldIntTy)
+ SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI);
+
+ // Zero extend the field to be the same size as the final alloca so that
+ // we can shift and insert it.
+ if (SrcField->getType() != ResultVal->getType())
+ SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI);
+
+ // Determine the number of bits to shift SrcField.
+ uint64_t Shift;
+ if (Layout) // Struct case.
+ Shift = Layout->getElementOffsetInBits(i);
+ else // Array case.
+ Shift = i*ArrayEltBitOffset;
+
+ if (TD->isBigEndian())
+ Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
+
+ if (Shift) {
+ Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift);
+ SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
+ }
+
+ // Don't create an 'or x, 0' on the first iteration.
+ if (!isa<Constant>(ResultVal) ||
+ !cast<Constant>(ResultVal)->isNullValue())
+ ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
+ else
+ ResultVal = SrcField;
+ }
+
+ // Handle tail padding by truncating the result
+ if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
+ ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI);
+
+ LI->replaceAllUsesWith(ResultVal);
+ DeadInsts.push_back(LI);
+}
+
+/// HasPadding - Return true if the specified type has any structure or
+/// alignment padding, false otherwise.
+static bool HasPadding(const Type *Ty, const TargetData &TD) {
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty))
+ return HasPadding(ATy->getElementType(), TD);
+
+ if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+ return HasPadding(VTy->getElementType(), TD);
+
+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ const StructLayout *SL = TD.getStructLayout(STy);
+ unsigned PrevFieldBitOffset = 0;
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
+
+ // Padding in sub-elements?
+ if (HasPadding(STy->getElementType(i), TD))
+ return true;
+
+ // Check to see if there is any padding between this element and the
+ // previous one.
+ if (i) {
+ unsigned PrevFieldEnd =
+ PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1));
+ if (PrevFieldEnd < FieldBitOffset)
+ return true;
+ }
+
+ PrevFieldBitOffset = FieldBitOffset;
+ }
+
+ // Check for tail padding.
+ if (unsigned EltCount = STy->getNumElements()) {
+ unsigned PrevFieldEnd = PrevFieldBitOffset +
+ TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
+ if (PrevFieldEnd < SL->getSizeInBits())
+ return true;
+ }
+ }
+
+ return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
+}
+
+/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
+/// an aggregate can be broken down into elements. Return 0 if not, 3 if safe,
+/// or 1 if safe after canonicalization has been performed.
+bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
+ // Loop over the use list of the alloca. We can only transform it if all of
+ // the users are safe to transform.
+ AllocaInfo Info;
+
+ isSafeForScalarRepl(AI, AI, 0, Info);
+ if (Info.isUnsafe) {
+ DEBUG(dbgs() << "Cannot transform: " << *AI << '\n');
+ return false;
+ }
+
+ // Okay, we know all the users are promotable. If the aggregate is a memcpy
+ // source and destination, we have to be careful. In particular, the memcpy
+ // could be moving around elements that live in structure padding of the LLVM
+ // types, but may actually be used. In these cases, we refuse to promote the
+ // struct.
+ if (Info.isMemCpySrc && Info.isMemCpyDst &&
+ HasPadding(AI->getAllocatedType(), *TD))
+ return false;
+
+ return true;
+}
+
+
+
+/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to
+/// some part of a constant global variable. This intentionally only accepts
+/// constant expressions because we don't can't rewrite arbitrary instructions.
+static bool PointsToConstantGlobal(Value *V) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return GV->isConstant();
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::BitCast ||
+ CE->getOpcode() == Instruction::GetElementPtr)
+ return PointsToConstantGlobal(CE->getOperand(0));
+ return false;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
+/// pointer to an alloca. Ignore any reads of the pointer, return false if we
+/// see any stores or other unknown uses. If we see pointer arithmetic, keep
+/// track of whether it moves the pointer (with isOffset) but otherwise traverse
+/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
+/// the alloca, and if the source pointer is a pointer to a constant global, we
+/// can optimize this.
+static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
+ bool isOffset) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
+ User *U = cast<Instruction>(*UI);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(U))
+ // Ignore non-volatile loads, they are always ok.
+ if (!LI->isVolatile())
+ continue;
+
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ // If uses of the bitcast are ok, we are ok.
+ if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset))
+ return false;
+ continue;
+ }
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ // If the GEP has all zero indices, it doesn't offset the pointer. If it
+ // doesn't, it does.
+ if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
+ isOffset || !GEP->hasAllZeroIndices()))
+ return false;
+ continue;
+ }
+
+ // If this is isn't our memcpy/memmove, reject it as something we can't
+ // handle.
+ MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
+ if (MI == 0)
+ return false;
+
+ // If we already have seen a copy, reject the second one.
+ if (TheCopy) return false;
+
+ // If the pointer has been offset from the start of the alloca, we can't
+ // safely handle this.
+ if (isOffset) return false;
+
+ // If the memintrinsic isn't using the alloca as the dest, reject it.
+ if (UI.getOperandNo() != 0) return false;
+
+ // If the source of the memcpy/move is not a constant global, reject it.
+ if (!PointsToConstantGlobal(MI->getSource()))
+ return false;
+
+ // Otherwise, the transform is safe. Remember the copy instruction.
+ TheCopy = MI;
+ }
+ return true;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
+/// modified by a copy from a constant global. If we can prove this, we can
+/// replace any uses of the alloca with uses of the global directly.
+MemTransferInst *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) {
+ MemTransferInst *TheCopy = 0;
+ if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false))
+ return TheCopy;
+ return 0;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
new file mode 100644
index 0000000..360749c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -0,0 +1,327 @@
+//===- SimplifyCFGPass.cpp - CFG Simplification Pass ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements dead code elimination and basic block merging, along
+// with a collection of other peephole control flow optimizations. For example:
+//
+// * Removes basic blocks with no predecessors.
+// * Merges a basic block into its predecessor if there is only one and the
+// predecessor only has one successor.
+// * Eliminates PHI nodes for basic blocks with a single predecessor.
+// * Eliminates a basic block that only contains an unconditional branch.
+// * Changes invoke instructions to nounwind functions to be calls.
+// * Change things like "if (x) if (y)" into "if (x&y)".
+// * etc..
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplifycfg"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Attributes.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSimpl, "Number of blocks simplified");
+
+namespace {
+ struct CFGSimplifyPass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ CFGSimplifyPass() : FunctionPass(ID) {}
+
+ virtual bool runOnFunction(Function &F);
+ };
+}
+
+char CFGSimplifyPass::ID = 0;
+INITIALIZE_PASS(CFGSimplifyPass, "simplifycfg",
+ "Simplify the CFG", false, false);
+
+// Public interface to the CFGSimplification pass
+FunctionPass *llvm::createCFGSimplificationPass() {
+ return new CFGSimplifyPass();
+}
+
+/// ChangeToUnreachable - Insert an unreachable instruction before the specified
+/// instruction, making it and the rest of the code in the block dead.
+static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
+ BasicBlock *BB = I->getParent();
+ // Loop over all of the successors, removing BB's entry from any PHI
+ // nodes.
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ (*SI)->removePredecessor(BB);
+
+ // Insert a call to llvm.trap right before this. This turns the undefined
+ // behavior into a hard fail instead of falling through into random code.
+ if (UseLLVMTrap) {
+ Function *TrapFn =
+ Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
+ CallInst::Create(TrapFn, "", I);
+ }
+ new UnreachableInst(I->getContext(), I);
+
+ // All instructions after this are dead.
+ BasicBlock::iterator BBI = I, BBE = BB->end();
+ while (BBI != BBE) {
+ if (!BBI->use_empty())
+ BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+ BB->getInstList().erase(BBI++);
+ }
+}
+
+/// ChangeToCall - Convert the specified invoke into a normal call.
+static void ChangeToCall(InvokeInst *II) {
+ BasicBlock *BB = II->getParent();
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args.begin(),
+ Args.end(), "", II);
+ NewCall->takeName(II);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setAttributes(II->getAttributes());
+ II->replaceAllUsesWith(NewCall);
+
+ // Follow the call by a branch to the normal destination.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Update PHI nodes in the unwind destination
+ II->getUnwindDest()->removePredecessor(BB);
+ BB->getInstList().erase(II);
+}
+
+static bool MarkAliveBlocks(BasicBlock *BB,
+ SmallPtrSet<BasicBlock*, 128> &Reachable) {
+
+ SmallVector<BasicBlock*, 128> Worklist;
+ Worklist.push_back(BB);
+ bool Changed = false;
+ do {
+ BB = Worklist.pop_back_val();
+
+ if (!Reachable.insert(BB))
+ continue;
+
+ // Do a quick scan of the basic block, turning any obviously unreachable
+ // instructions into LLVM unreachable insts. The instruction combining pass
+ // canonicalizes unreachable insts into stores to null or undef.
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
+ if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
+ if (CI->doesNotReturn()) {
+ // If we found a call to a no-return function, insert an unreachable
+ // instruction after it. Make sure there isn't *already* one there
+ // though.
+ ++BBI;
+ if (!isa<UnreachableInst>(BBI)) {
+ // Don't insert a call to llvm.trap right before the unreachable.
+ ChangeToUnreachable(BBI, false);
+ Changed = true;
+ }
+ break;
+ }
+ }
+
+ // Store to undef and store to null are undefined and used to signal that
+ // they should be changed to unreachable by passes that can't modify the
+ // CFG.
+ if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ // Don't touch volatile stores.
+ if (SI->isVolatile()) continue;
+
+ Value *Ptr = SI->getOperand(1);
+
+ if (isa<UndefValue>(Ptr) ||
+ (isa<ConstantPointerNull>(Ptr) &&
+ SI->getPointerAddressSpace() == 0)) {
+ ChangeToUnreachable(SI, true);
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ // Turn invokes that call 'nounwind' functions into ordinary calls.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
+ if (II->doesNotThrow()) {
+ ChangeToCall(II);
+ Changed = true;
+ }
+
+ Changed |= ConstantFoldTerminator(BB);
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ Worklist.push_back(*SI);
+ } while (!Worklist.empty());
+ return Changed;
+}
+
+/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even
+/// if they are in a dead cycle. Return true if a change was made, false
+/// otherwise.
+static bool RemoveUnreachableBlocksFromFn(Function &F) {
+ SmallPtrSet<BasicBlock*, 128> Reachable;
+ bool Changed = MarkAliveBlocks(F.begin(), Reachable);
+
+ // If there are unreachable blocks in the CFG...
+ if (Reachable.size() == F.size())
+ return Changed;
+
+ assert(Reachable.size() < F.size());
+ NumSimpl += F.size()-Reachable.size();
+
+ // Loop over all of the basic blocks that are not reachable, dropping all of
+ // their internal references...
+ for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
+ if (Reachable.count(BB))
+ continue;
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ if (Reachable.count(*SI))
+ (*SI)->removePredecessor(BB);
+ BB->dropAllReferences();
+ }
+
+ for (Function::iterator I = ++F.begin(); I != F.end();)
+ if (!Reachable.count(I))
+ I = F.getBasicBlockList().erase(I);
+ else
+ ++I;
+
+ return true;
+}
+
+/// MergeEmptyReturnBlocks - If we have more than one empty (other than phi
+/// node) return blocks, merge them together to promote recursive block merging.
+static bool MergeEmptyReturnBlocks(Function &F) {
+ bool Changed = false;
+
+ BasicBlock *RetBlock = 0;
+
+ // Scan all the blocks in the function, looking for empty return blocks.
+ for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) {
+ BasicBlock &BB = *BBI++;
+
+ // Only look at return blocks.
+ ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
+ if (Ret == 0) continue;
+
+ // Only look at the block if it is empty or the only other thing in it is a
+ // single PHI node that is the operand to the return.
+ if (Ret != &BB.front()) {
+ // Check for something else in the block.
+ BasicBlock::iterator I = Ret;
+ --I;
+ // Skip over debug info.
+ while (isa<DbgInfoIntrinsic>(I) && I != BB.begin())
+ --I;
+ if (!isa<DbgInfoIntrinsic>(I) &&
+ (!isa<PHINode>(I) || I != BB.begin() ||
+ Ret->getNumOperands() == 0 ||
+ Ret->getOperand(0) != I))
+ continue;
+ }
+
+ // If this is the first returning block, remember it and keep going.
+ if (RetBlock == 0) {
+ RetBlock = &BB;
+ continue;
+ }
+
+ // Otherwise, we found a duplicate return block. Merge the two.
+ Changed = true;
+
+ // Case when there is no input to the return or when the returned values
+ // agree is trivial. Note that they can't agree if there are phis in the
+ // blocks.
+ if (Ret->getNumOperands() == 0 ||
+ Ret->getOperand(0) ==
+ cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0)) {
+ BB.replaceAllUsesWith(RetBlock);
+ BB.eraseFromParent();
+ continue;
+ }
+
+ // If the canonical return block has no PHI node, create one now.
+ PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin());
+ if (RetBlockPHI == 0) {
+ Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0);
+ RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), "merge",
+ &RetBlock->front());
+
+ for (pred_iterator PI = pred_begin(RetBlock), E = pred_end(RetBlock);
+ PI != E; ++PI)
+ RetBlockPHI->addIncoming(InVal, *PI);
+ RetBlock->getTerminator()->setOperand(0, RetBlockPHI);
+ }
+
+ // Turn BB into a block that just unconditionally branches to the return
+ // block. This handles the case when the two return blocks have a common
+ // predecessor but that return different things.
+ RetBlockPHI->addIncoming(Ret->getOperand(0), &BB);
+ BB.getTerminator()->eraseFromParent();
+ BranchInst::Create(RetBlock, &BB);
+ }
+
+ return Changed;
+}
+
+/// IterativeSimplifyCFG - Call SimplifyCFG on all the blocks in the function,
+/// iterating until no more changes are made.
+static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) {
+ bool Changed = false;
+ bool LocalChange = true;
+ while (LocalChange) {
+ LocalChange = false;
+
+ // Loop over all of the basic blocks and remove them if they are unneeded...
+ //
+ for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
+ if (SimplifyCFG(BBIt++, TD)) {
+ LocalChange = true;
+ ++NumSimpl;
+ }
+ }
+ Changed |= LocalChange;
+ }
+ return Changed;
+}
+
+// It is possible that we may require multiple passes over the code to fully
+// simplify the CFG.
+//
+bool CFGSimplifyPass::runOnFunction(Function &F) {
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ bool EverChanged = RemoveUnreachableBlocksFromFn(F);
+ EverChanged |= MergeEmptyReturnBlocks(F);
+ EverChanged |= IterativeSimplifyCFG(F, TD);
+
+ // If neither pass changed anything, we're done.
+ if (!EverChanged) return false;
+
+ // IterativeSimplifyCFG can (rarely) make some loops dead. If this happens,
+ // RemoveUnreachableBlocksFromFn is needed to nuke them, which means we should
+ // iterate between the two optimizations. We structure the code like this to
+ // avoid reruning IterativeSimplifyCFG if the second pass of
+ // RemoveUnreachableBlocksFromFn doesn't do anything.
+ if (!RemoveUnreachableBlocksFromFn(F))
+ return true;
+
+ do {
+ EverChanged = IterativeSimplifyCFG(F, TD);
+ EverChanged |= RemoveUnreachableBlocksFromFn(F);
+ } while (EverChanged);
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
new file mode 100644
index 0000000..3ec70ec
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
@@ -0,0 +1,157 @@
+//===- SimplifyHalfPowrLibCalls.cpp - Optimize specific half_powr calls ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple pass that applies an experimental
+// transformation on calls to specific functions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplify-libcalls-halfpowr"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace {
+ /// This pass optimizes well half_powr function calls.
+ ///
+ class SimplifyHalfPowrLibCalls : public FunctionPass {
+ const TargetData *TD;
+ public:
+ static char ID; // Pass identification
+ SimplifyHalfPowrLibCalls() : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ }
+
+ Instruction *
+ InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
+ Instruction *InsertPt);
+ };
+ char SimplifyHalfPowrLibCalls::ID = 0;
+} // end anonymous namespace.
+
+INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr",
+ "Simplify half_powr library calls", false, false);
+
+// Public interface to the Simplify HalfPowr LibCalls pass.
+FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() {
+ return new SimplifyHalfPowrLibCalls();
+}
+
+/// InlineHalfPowrs - Inline a sequence of adjacent half_powr calls, rearranging
+/// their control flow to better facilitate subsequent optimization.
+Instruction *
+SimplifyHalfPowrLibCalls::
+InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
+ Instruction *InsertPt) {
+ std::vector<BasicBlock *> Bodies;
+ BasicBlock *NewBlock = 0;
+
+ for (unsigned i = 0, e = HalfPowrs.size(); i != e; ++i) {
+ CallInst *Call = cast<CallInst>(HalfPowrs[i]);
+ Function *Callee = Call->getCalledFunction();
+
+ // Minimally sanity-check the CFG of half_powr to ensure that it contains
+ // the kind of code we expect. If we're running this pass, we have
+ // reason to believe it will be what we expect.
+ Function::iterator I = Callee->begin();
+ BasicBlock *Prologue = I++;
+ if (I == Callee->end()) break;
+ BasicBlock *SubnormalHandling = I++;
+ if (I == Callee->end()) break;
+ BasicBlock *Body = I++;
+ if (I != Callee->end()) break;
+ if (SubnormalHandling->getSinglePredecessor() != Prologue)
+ break;
+ BranchInst *PBI = dyn_cast<BranchInst>(Prologue->getTerminator());
+ if (!PBI || !PBI->isConditional())
+ break;
+ BranchInst *SNBI = dyn_cast<BranchInst>(SubnormalHandling->getTerminator());
+ if (!SNBI || SNBI->isConditional())
+ break;
+ if (!isa<ReturnInst>(Body->getTerminator()))
+ break;
+
+ Instruction *NextInst = llvm::next(BasicBlock::iterator(Call));
+
+ // Inline the call, taking care of what code ends up where.
+ NewBlock = SplitBlock(NextInst->getParent(), NextInst, this);
+
+ InlineFunctionInfo IFI(0, TD);
+ bool B = InlineFunction(Call, IFI);
+ assert(B && "half_powr didn't inline?"); B=B;
+
+ BasicBlock *NewBody = NewBlock->getSinglePredecessor();
+ assert(NewBody);
+ Bodies.push_back(NewBody);
+ }
+
+ if (!NewBlock)
+ return InsertPt;
+
+ // Put the code for all the bodies into one block, to facilitate
+ // subsequent optimization.
+ (void)SplitEdge(NewBlock->getSinglePredecessor(), NewBlock, this);
+ for (unsigned i = 0, e = Bodies.size(); i != e; ++i) {
+ BasicBlock *Body = Bodies[i];
+ Instruction *FNP = Body->getFirstNonPHI();
+ // Splice the insts from body into NewBlock.
+ NewBlock->getInstList().splice(NewBlock->begin(), Body->getInstList(),
+ FNP, Body->getTerminator());
+ }
+
+ return NewBlock->begin();
+}
+
+/// runOnFunction - Top level algorithm.
+///
+bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) {
+ TD = getAnalysisIfAvailable<TargetData>();
+
+ bool Changed = false;
+ std::vector<Instruction *> HalfPowrs;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // Look for calls.
+ bool IsHalfPowr = false;
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ // Look for direct calls and calls to non-external functions.
+ Function *Callee = CI->getCalledFunction();
+ if (Callee && Callee->hasExternalLinkage()) {
+ // Look for calls with well-known names.
+ if (Callee->getName() == "__half_powrf4")
+ IsHalfPowr = true;
+ }
+ }
+ if (IsHalfPowr)
+ HalfPowrs.push_back(I);
+ // We're looking for sequences of up to three such calls, which we'll
+ // simplify as a group.
+ if ((!IsHalfPowr && !HalfPowrs.empty()) || HalfPowrs.size() == 3) {
+ I = InlineHalfPowrs(HalfPowrs, I);
+ E = I->getParent()->end();
+ HalfPowrs.clear();
+ Changed = true;
+ }
+ }
+ assert(HalfPowrs.empty() && "Block had no terminator!");
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
new file mode 100644
index 0000000..d7ce53f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -0,0 +1,2201 @@
+//===- SimplifyLibCalls.cpp - Optimize specific well-known library calls --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple pass that applies a variety of small
+// optimizations for calls to specific well-known function calls (e.g. runtime
+// library functions). Any optimization that takes the very simple form
+// "replace call to library function with simpler code that provides the same
+// result" belongs in this file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplify-libcalls"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of library calls simplified");
+STATISTIC(NumAnnotated, "Number of attributes added to library functions");
+
+//===----------------------------------------------------------------------===//
+// Optimizer Base Class
+//===----------------------------------------------------------------------===//
+
+/// This class is the abstract base class for the set of optimizations that
+/// corresponds to one library call.
+namespace {
+class LibCallOptimization {
+protected:
+ Function *Caller;
+ const TargetData *TD;
+ LLVMContext* Context;
+public:
+ LibCallOptimization() { }
+ virtual ~LibCallOptimization() {}
+
+ /// CallOptimizer - This pure virtual method is implemented by base classes to
+ /// do various optimizations. If this returns null then no transformation was
+ /// performed. If it returns CI, then it transformed the call and CI is to be
+ /// deleted. If it returns something else, replace CI with the new value and
+ /// delete CI.
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
+ =0;
+
+ Value *OptimizeCall(CallInst *CI, const TargetData *TD, IRBuilder<> &B) {
+ Caller = CI->getParent()->getParent();
+ this->TD = TD;
+ if (CI->getCalledFunction())
+ Context = &CI->getCalledFunction()->getContext();
+
+ // We never change the calling convention.
+ if (CI->getCallingConv() != llvm::CallingConv::C)
+ return NULL;
+
+ return CallOptimizer(CI->getCalledFunction(), CI, B);
+ }
+};
+} // End anonymous namespace.
+
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
+/// value is equal or not-equal to zero.
+static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ if (IC->isEquality())
+ if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+ if (C->isNullValue())
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
+/// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality
+/// comparisons with With.
+static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ if (IC->isEquality() && IC->getOperand(1) == With)
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// String and Memory LibCall Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'strcat' Optimizations
+namespace {
+struct StrCatOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strcat" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ FT->getParamType(1) != FT->getReturnType())
+ return 0;
+
+ // Extract some information from the instruction
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+ --Len; // Unbias length.
+
+ // Handle the simple, do-nothing case: strcat(x, "") -> x
+ if (Len == 0)
+ return Dst;
+
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ EmitStrLenMemCpy(Src, Dst, Len, B);
+ return Dst;
+ }
+
+ void EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) {
+ // We need to find the end of the destination string. That's where the
+ // memory is to be moved to. We just generate a call to strlen.
+ Value *DstLen = EmitStrLen(Dst, B, TD);
+
+ // Now that we have the destination's length, we must index into the
+ // destination's pointer to get the actual memcpy destination (end of
+ // the string .. we're concatenating).
+ Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
+
+ // We have enough information to now generate the memcpy call to do the
+ // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
+ EmitMemCpy(CpyDst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len+1),
+ 1, false, B, TD);
+ }
+};
+
+//===---------------------------------------===//
+// 'strncat' Optimizations
+
+struct StrNCatOpt : public StrCatOpt {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strncat" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 ||
+ FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ FT->getParamType(1) != FT->getReturnType() ||
+ !FT->getParamType(2)->isIntegerTy())
+ return 0;
+
+ // Extract some information from the instruction
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ uint64_t Len;
+
+ // We don't do anything if length is not constant
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+ Len = LengthArg->getZExtValue();
+ else
+ return 0;
+
+ // See if we can get the length of the input string.
+ uint64_t SrcLen = GetStringLength(Src);
+ if (SrcLen == 0) return 0;
+ --SrcLen; // Unbias length.
+
+ // Handle the simple, do-nothing cases:
+ // strncat(x, "", c) -> x
+ // strncat(x, c, 0) -> x
+ if (SrcLen == 0 || Len == 0) return Dst;
+
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ // We don't optimize this case
+ if (Len < SrcLen) return 0;
+
+ // strncat(x, s, c) -> strcat(x, s)
+ // s is constant so the strcat can be optimized further
+ EmitStrLenMemCpy(Src, Dst, SrcLen, B);
+ return Dst;
+ }
+};
+
+//===---------------------------------------===//
+// 'strchr' Optimizations
+
+struct StrChrOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strchr" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
+ FT->getParamType(0) != FT->getReturnType())
+ return 0;
+
+ Value *SrcStr = CI->getArgOperand(0);
+
+ // If the second operand is non-constant, see if we can compute the length
+ // of the input string and turn this into memchr.
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ if (CharC == 0) {
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ uint64_t Len = GetStringLength(SrcStr);
+ if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
+ return 0;
+
+ return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
+ ConstantInt::get(TD->getIntPtrType(*Context), Len),
+ B, TD);
+ }
+
+ // Otherwise, the character is a constant, see if the first argument is
+ // a string literal. If so, we can constant fold.
+ std::string Str;
+ if (!GetConstantStringInfo(SrcStr, Str))
+ return 0;
+
+ // strchr can find the nul character.
+ Str += '\0';
+ char CharValue = CharC->getSExtValue();
+
+ // Compute the offset.
+ uint64_t i = 0;
+ while (1) {
+ if (i == Str.size()) // Didn't find the char. strchr returns null.
+ return Constant::getNullValue(CI->getType());
+ // Did we find our match?
+ if (Str[i] == CharValue)
+ break;
+ ++i;
+ }
+
+ // strchr(s+n,c) -> gep(s+n+i,c)
+ Value *Idx = ConstantInt::get(Type::getInt64Ty(*Context), i);
+ return B.CreateGEP(SrcStr, Idx, "strchr");
+ }
+};
+
+//===---------------------------------------===//
+// 'strcmp' Optimizations
+
+struct StrCmpOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strcmp" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ !FT->getReturnType()->isIntegerTy(32) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(*Context))
+ return 0;
+
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+ if (Str1P == Str2P) // strcmp(x,x) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ std::string Str1, Str2;
+ bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
+
+ if (HasStr1 && Str1.empty()) // strcmp("", x) -> *x
+ return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType());
+
+ if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+ // strcmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2)
+ return ConstantInt::get(CI->getType(),
+ strcmp(Str1.c_str(),Str2.c_str()));
+
+ // strcmp(P, "x") -> memcmp(P, "x", 2)
+ uint64_t Len1 = GetStringLength(Str1P);
+ uint64_t Len2 = GetStringLength(Str2P);
+ if (Len1 && Len2) {
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ return EmitMemCmp(Str1P, Str2P,
+ ConstantInt::get(TD->getIntPtrType(*Context),
+ std::min(Len1, Len2)), B, TD);
+ }
+
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'strncmp' Optimizations
+
+struct StrNCmpOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strncmp" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 ||
+ !FT->getReturnType()->isIntegerTy(32) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+ !FT->getParamType(2)->isIntegerTy())
+ return 0;
+
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+ if (Str1P == Str2P) // strncmp(x,x,n) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ // Get the length argument if it is constant.
+ uint64_t Length;
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+ Length = LengthArg->getZExtValue();
+ else
+ return 0;
+
+ if (Length == 0) // strncmp(x,y,0) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
+ return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD);
+
+ std::string Str1, Str2;
+ bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
+
+ if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> *x
+ return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType());
+
+ if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+ // strncmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2)
+ return ConstantInt::get(CI->getType(),
+ strncmp(Str1.c_str(), Str2.c_str(), Length));
+ return 0;
+ }
+};
+
+
+//===---------------------------------------===//
+// 'strcpy' Optimizations
+
+struct StrCpyOpt : public LibCallOptimization {
+ bool OptChkCall; // True if it's optimizing a __strcpy_chk libcall.
+
+ StrCpyOpt(bool c) : OptChkCall(c) {}
+
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strcpy" function prototype.
+ unsigned NumParams = OptChkCall ? 3 : 2;
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != NumParams ||
+ FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(*Context))
+ return 0;
+
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) // strcpy(x,x) -> x
+ return Src;
+
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+
+ // We have enough information to now generate the memcpy call to do the
+ // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
+ if (OptChkCall)
+ EmitMemCpyChk(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len),
+ CI->getArgOperand(2), B, TD);
+ else
+ EmitMemCpy(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len),
+ 1, false, B, TD);
+ return Dst;
+ }
+};
+
+//===---------------------------------------===//
+// 'strncpy' Optimizations
+
+struct StrNCpyOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+ !FT->getParamType(2)->isIntegerTy())
+ return 0;
+
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ Value *LenOp = CI->getArgOperand(2);
+
+ // See if we can get the length of the input string.
+ uint64_t SrcLen = GetStringLength(Src);
+ if (SrcLen == 0) return 0;
+ --SrcLen;
+
+ if (SrcLen == 0) {
+ // strncpy(x, "", y) -> memset(x, '\0', y, 1)
+ EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'),
+ LenOp, false, B, TD);
+ return Dst;
+ }
+
+ uint64_t Len;
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp))
+ Len = LengthArg->getZExtValue();
+ else
+ return 0;
+
+ if (Len == 0) return Dst; // strncpy(x, y, 0) -> x
+
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ // Let strncpy handle the zero padding
+ if (Len > SrcLen+1) return 0;
+
+ // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
+ EmitMemCpy(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len),
+ 1, false, B, TD);
+
+ return Dst;
+ }
+};
+
+//===---------------------------------------===//
+// 'strlen' Optimizations
+
+struct StrLenOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 1 ||
+ FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ Value *Src = CI->getArgOperand(0);
+
+ // Constant folding: strlen("xyz") -> 3
+ if (uint64_t Len = GetStringLength(Src))
+ return ConstantInt::get(CI->getType(), Len-1);
+
+ // strlen(x) != 0 --> *x != 0
+ // strlen(x) == 0 --> *x == 0
+ if (IsOnlyUsedInZeroEqualityComparison(CI))
+ return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'strto*' Optimizations. This handles strtol, strtod, strtof, strtoul, etc.
+
+struct StrToOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy())
+ return 0;
+
+ Value *EndPtr = CI->getArgOperand(1);
+ if (isa<ConstantPointerNull>(EndPtr)) {
+ CI->setOnlyReadsMemory();
+ CI->addAttribute(1, Attribute::NoCapture);
+ }
+
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'strstr' Optimizations
+
+struct StrStrOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isPointerTy())
+ return 0;
+
+ // fold strstr(x, x) -> x.
+ if (CI->getArgOperand(0) == CI->getArgOperand(1))
+ return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+
+ // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
+ if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
+ Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD);
+ Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
+ StrLen, B, TD);
+ for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
+ UI != UE; ) {
+ ICmpInst *Old = cast<ICmpInst>(*UI++);
+ Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp,
+ ConstantInt::getNullValue(StrNCmp->getType()),
+ "cmp");
+ Old->replaceAllUsesWith(Cmp);
+ Old->eraseFromParent();
+ }
+ return CI;
+ }
+
+ // See if either input string is a constant string.
+ std::string SearchStr, ToFindStr;
+ bool HasStr1 = GetConstantStringInfo(CI->getArgOperand(0), SearchStr);
+ bool HasStr2 = GetConstantStringInfo(CI->getArgOperand(1), ToFindStr);
+
+ // fold strstr(x, "") -> x.
+ if (HasStr2 && ToFindStr.empty())
+ return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+
+ // If both strings are known, constant fold it.
+ if (HasStr1 && HasStr2) {
+ std::string::size_type Offset = SearchStr.find(ToFindStr);
+
+ if (Offset == std::string::npos) // strstr("foo", "bar") -> null
+ return Constant::getNullValue(CI->getType());
+
+ // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
+ Value *Result = CastToCStr(CI->getArgOperand(0), B);
+ Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr");
+ return B.CreateBitCast(Result, CI->getType());
+ }
+
+ // fold strstr(x, "y") -> strchr(x, 'y').
+ if (HasStr2 && ToFindStr.size() == 1)
+ return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0),
+ ToFindStr[0], B, TD), CI->getType());
+ return 0;
+ }
+};
+
+
+//===---------------------------------------===//
+// 'memcmp' Optimizations
+
+struct MemCmpOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy(32))
+ return 0;
+
+ Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
+
+ if (LHS == RHS) // memcmp(s,s,x) -> 0
+ return Constant::getNullValue(CI->getType());
+
+ // Make sure we have a constant length.
+ ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ if (!LenC) return 0;
+ uint64_t Len = LenC->getZExtValue();
+
+ if (Len == 0) // memcmp(s1,s2,0) -> 0
+ return Constant::getNullValue(CI->getType());
+
+ // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
+ if (Len == 1) {
+ Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"),
+ CI->getType(), "lhsv");
+ Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"),
+ CI->getType(), "rhsv");
+ return B.CreateSub(LHSV, RHSV, "chardiff");
+ }
+
+ // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
+ std::string LHSStr, RHSStr;
+ if (GetConstantStringInfo(LHS, LHSStr) &&
+ GetConstantStringInfo(RHS, RHSStr)) {
+ // Make sure we're not reading out-of-bounds memory.
+ if (Len > LHSStr.length() || Len > RHSStr.length())
+ return 0;
+ uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len);
+ return ConstantInt::get(CI->getType(), Ret);
+ }
+
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'memcpy' Optimizations
+
+struct MemCpyOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(*Context))
+ return 0;
+
+ // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1, false, B, TD);
+ return CI->getArgOperand(0);
+ }
+};
+
+//===---------------------------------------===//
+// 'memmove' Optimizations
+
+struct MemMoveOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(*Context))
+ return 0;
+
+ // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
+ EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1, false, B, TD);
+ return CI->getArgOperand(0);
+ }
+};
+
+//===---------------------------------------===//
+// 'memset' Optimizations
+
+struct MemSetOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(*Context))
+ return 0;
+
+ // memset(p, v, n) -> llvm.memset(p, v, n, 1)
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1),
+ Type::getInt8Ty(*Context), false);
+ EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD);
+ return CI->getArgOperand(0);
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Math Library Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'pow*' Optimizations
+
+struct PowOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 2 arguments of the same FP type, which match the
+ // result type.
+ if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return 0;
+
+ Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
+ if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
+ if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0
+ return Op1C;
+ if (Op1C->isExactlyValue(2.0)) // pow(2.0, x) -> exp2(x)
+ return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
+ }
+
+ ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
+ if (Op2C == 0) return 0;
+
+ if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
+ return ConstantFP::get(CI->getType(), 1.0);
+
+ if (Op2C->isExactlyValue(0.5)) {
+ // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
+ // This is faster than calling pow, and still handles negative zero
+ // and negative infinite correctly.
+ // TODO: In fast-math mode, this could be just sqrt(x).
+ // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
+ Value *Inf = ConstantFP::getInfinity(CI->getType());
+ Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
+ Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B,
+ Callee->getAttributes());
+ Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B,
+ Callee->getAttributes());
+ Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf, "tmp");
+ Value *Sel = B.CreateSelect(FCmp, Inf, FAbs, "tmp");
+ return Sel;
+ }
+
+ if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x
+ return Op1;
+ if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x
+ return B.CreateFMul(Op1, Op1, "pow2");
+ if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
+ return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0),
+ Op1, "powrecip");
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'exp2' Optimizations
+
+struct Exp2Opt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 1 argument of FP type, which matches the
+ // result type.
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return 0;
+
+ Value *Op = CI->getArgOperand(0);
+ // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
+ // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
+ Value *LdExpArg = 0;
+ if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
+ if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
+ LdExpArg = B.CreateSExt(OpC->getOperand(0),
+ Type::getInt32Ty(*Context), "tmp");
+ } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
+ if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
+ LdExpArg = B.CreateZExt(OpC->getOperand(0),
+ Type::getInt32Ty(*Context), "tmp");
+ }
+
+ if (LdExpArg) {
+ const char *Name;
+ if (Op->getType()->isFloatTy())
+ Name = "ldexpf";
+ else if (Op->getType()->isDoubleTy())
+ Name = "ldexp";
+ else
+ Name = "ldexpl";
+
+ Constant *One = ConstantFP::get(*Context, APFloat(1.0f));
+ if (!Op->getType()->isFloatTy())
+ One = ConstantExpr::getFPExtend(One, Op->getType());
+
+ Module *M = Caller->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+ Op->getType(),
+ Type::getInt32Ty(*Context),NULL);
+ CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+ }
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
+
+struct UnaryDoubleFPOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
+ !FT->getParamType(0)->isDoubleTy())
+ return 0;
+
+ // If this is something like 'floor((double)floatval)', convert to floorf.
+ FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
+ if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
+ return 0;
+
+ // floor((double)floatval) -> (double)floorf(floatval)
+ Value *V = Cast->getOperand(0);
+ V = EmitUnaryFloatFnCall(V, Callee->getName().data(), B,
+ Callee->getAttributes());
+ return B.CreateFPExt(V, Type::getDoubleTy(*Context));
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Integer Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'ffs*' Optimizations
+
+struct FFSOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 2 arguments of the same FP type, which match the
+ // result type.
+ if (FT->getNumParams() != 1 ||
+ !FT->getReturnType()->isIntegerTy(32) ||
+ !FT->getParamType(0)->isIntegerTy())
+ return 0;
+
+ Value *Op = CI->getArgOperand(0);
+
+ // Constant fold.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ if (CI->getValue() == 0) // ffs(0) -> 0.
+ return Constant::getNullValue(CI->getType());
+ return ConstantInt::get(Type::getInt32Ty(*Context), // ffs(c) -> cttz(c)+1
+ CI->getValue().countTrailingZeros()+1);
+ }
+
+ // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
+ const Type *ArgType = Op->getType();
+ Value *F = Intrinsic::getDeclaration(Callee->getParent(),
+ Intrinsic::cttz, &ArgType, 1);
+ Value *V = B.CreateCall(F, Op, "cttz");
+ V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp");
+ V = B.CreateIntCast(V, Type::getInt32Ty(*Context), false, "tmp");
+
+ Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp");
+ return B.CreateSelect(Cond, V,
+ ConstantInt::get(Type::getInt32Ty(*Context), 0));
+ }
+};
+
+//===---------------------------------------===//
+// 'isdigit' Optimizations
+
+struct IsDigitOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // We require integer(i32)
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+ !FT->getParamType(0)->isIntegerTy(32))
+ return 0;
+
+ // isdigit(c) -> (c-'0') <u 10
+ Value *Op = CI->getArgOperand(0);
+ Op = B.CreateSub(Op, ConstantInt::get(Type::getInt32Ty(*Context), '0'),
+ "isdigittmp");
+ Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 10),
+ "isdigit");
+ return B.CreateZExt(Op, CI->getType());
+ }
+};
+
+//===---------------------------------------===//
+// 'isascii' Optimizations
+
+struct IsAsciiOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // We require integer(i32)
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+ !FT->getParamType(0)->isIntegerTy(32))
+ return 0;
+
+ // isascii(c) -> c <u 128
+ Value *Op = CI->getArgOperand(0);
+ Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 128),
+ "isascii");
+ return B.CreateZExt(Op, CI->getType());
+ }
+};
+
+//===---------------------------------------===//
+// 'abs', 'labs', 'llabs' Optimizations
+
+struct AbsOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // We require integer(integer) where the types agree.
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+ FT->getParamType(0) != FT->getReturnType())
+ return 0;
+
+ // abs(x) -> x >s -1 ? x : -x
+ Value *Op = CI->getArgOperand(0);
+ Value *Pos = B.CreateICmpSGT(Op,
+ Constant::getAllOnesValue(Op->getType()),
+ "ispos");
+ Value *Neg = B.CreateNeg(Op, "neg");
+ return B.CreateSelect(Pos, Op, Neg);
+ }
+};
+
+
+//===---------------------------------------===//
+// 'toascii' Optimizations
+
+struct ToAsciiOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // We require i32(i32)
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isIntegerTy(32))
+ return 0;
+
+ // isascii(c) -> c & 0x7f
+ return B.CreateAnd(CI->getArgOperand(0),
+ ConstantInt::get(CI->getType(),0x7F));
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Formatting and IO Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'printf' Optimizations
+
+struct PrintFOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require one fixed pointer argument and an integer/void result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+ !(FT->getReturnType()->isIntegerTy() ||
+ FT->getReturnType()->isVoidTy()))
+ return 0;
+
+ // Check for a fixed format string.
+ std::string FormatStr;
+ if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr))
+ return 0;
+
+ // Empty format string -> noop.
+ if (FormatStr.empty()) // Tolerate printf's declared void.
+ return CI->use_empty() ? (Value*)CI :
+ ConstantInt::get(CI->getType(), 0);
+
+ // printf("x") -> putchar('x'), even for '%'. Return the result of putchar
+ // in case there is an error writing to stdout.
+ if (FormatStr.size() == 1) {
+ Value *Res = EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context),
+ FormatStr[0]), B, TD);
+ if (CI->use_empty()) return CI;
+ return B.CreateIntCast(Res, CI->getType(), true);
+ }
+
+ // printf("foo\n") --> puts("foo")
+ if (FormatStr[FormatStr.size()-1] == '\n' &&
+ FormatStr.find('%') == std::string::npos) { // no format characters.
+ // Create a string literal with no \n on it. We expect the constant merge
+ // pass to be run after this pass, to merge duplicate strings.
+ FormatStr.erase(FormatStr.end()-1);
+ Constant *C = ConstantArray::get(*Context, FormatStr, true);
+ C = new GlobalVariable(*Callee->getParent(), C->getType(), true,
+ GlobalVariable::InternalLinkage, C, "str");
+ EmitPutS(C, B, TD);
+ return CI->use_empty() ? (Value*)CI :
+ ConstantInt::get(CI->getType(), FormatStr.size()+1);
+ }
+
+ // Optimize specific format strings.
+ // printf("%c", chr) --> putchar(chr)
+ if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
+ CI->getArgOperand(1)->getType()->isIntegerTy()) {
+ Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD);
+
+ if (CI->use_empty()) return CI;
+ return B.CreateIntCast(Res, CI->getType(), true);
+ }
+
+ // printf("%s\n", str) --> puts(str)
+ if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
+ CI->getArgOperand(1)->getType()->isPointerTy() &&
+ CI->use_empty()) {
+ EmitPutS(CI->getArgOperand(1), B, TD);
+ return CI;
+ }
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'sprintf' Optimizations
+
+struct SPrintFOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require two fixed pointer arguments and an integer result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ // Check for a fixed format string.
+ std::string FormatStr;
+ if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
+ return 0;
+
+ // If we just have a format string (nothing else crazy) transform it.
+ if (CI->getNumArgOperands() == 2) {
+ // Make sure there's no % in the constant array. We could try to handle
+ // %% -> % in the future if we cared.
+ for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+ if (FormatStr[i] == '%')
+ return 0; // we found a format specifier, bail out.
+
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the
+ ConstantInt::get(TD->getIntPtrType(*Context), // nul byte.
+ FormatStr.size() + 1), 1, false, B, TD);
+ return ConstantInt::get(CI->getType(), FormatStr.size());
+ }
+
+ // The remaining optimizations require the format string to be "%s" or "%c"
+ // and have an extra operand.
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+ CI->getNumArgOperands() < 3)
+ return 0;
+
+ // Decode the second character of the format string.
+ if (FormatStr[1] == 'c') {
+ // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+ Value *V = B.CreateTrunc(CI->getArgOperand(2),
+ Type::getInt8Ty(*Context), "char");
+ Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
+ B.CreateStore(V, Ptr);
+ Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1),
+ "nul");
+ B.CreateStore(Constant::getNullValue(Type::getInt8Ty(*Context)), Ptr);
+
+ return ConstantInt::get(CI->getType(), 1);
+ }
+
+ if (FormatStr[1] == 's') {
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
+ if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
+
+ Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD);
+ Value *IncLen = B.CreateAdd(Len,
+ ConstantInt::get(Len->getType(), 1),
+ "leninc");
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2),
+ IncLen, 1, false, B, TD);
+
+ // The sprintf result is the unincremented number of bytes in the string.
+ return B.CreateIntCast(Len, CI->getType(), false);
+ }
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'fwrite' Optimizations
+
+struct FWriteOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require a pointer, an integer, an integer, a pointer, returning integer.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ !FT->getParamType(2)->isIntegerTy() ||
+ !FT->getParamType(3)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ // Get the element size and count.
+ ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ if (!SizeC || !CountC) return 0;
+ uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
+
+ // If this is writing zero records, remove the call (it's a noop).
+ if (Bytes == 0)
+ return ConstantInt::get(CI->getType(), 0);
+
+ // If this is writing one byte, turn it into fputc.
+ if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F)
+ Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
+ EmitFPutC(Char, CI->getArgOperand(3), B, TD);
+ return ConstantInt::get(CI->getType(), 1);
+ }
+
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'fputs' Optimizations
+
+struct FPutsOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ // Require two pointers. Also, we can't optimize if return value is used.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !CI->use_empty())
+ return 0;
+
+ // fputs(s,F) --> fwrite(s,1,strlen(s),F)
+ uint64_t Len = GetStringLength(CI->getArgOperand(0));
+ if (!Len) return 0;
+ EmitFWrite(CI->getArgOperand(0),
+ ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
+ CI->getArgOperand(1), B, TD);
+ return CI; // Known to have no uses (see above).
+ }
+};
+
+//===---------------------------------------===//
+// 'fprintf' Optimizations
+
+struct FPrintFOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require two fixed paramters as pointers and integer result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ // All the optimizations depend on the format string.
+ std::string FormatStr;
+ if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
+ return 0;
+
+ // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
+ if (CI->getNumArgOperands() == 2) {
+ for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+ if (FormatStr[i] == '%') // Could handle %% -> % if we cared.
+ return 0; // We found a format specifier.
+
+ // These optimizations require TargetData.
+ if (!TD) return 0;
+
+ EmitFWrite(CI->getArgOperand(1),
+ ConstantInt::get(TD->getIntPtrType(*Context),
+ FormatStr.size()),
+ CI->getArgOperand(0), B, TD);
+ return ConstantInt::get(CI->getType(), FormatStr.size());
+ }
+
+ // The remaining optimizations require the format string to be "%s" or "%c"
+ // and have an extra operand.
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+ CI->getNumArgOperands() < 3)
+ return 0;
+
+ // Decode the second character of the format string.
+ if (FormatStr[1] == 'c') {
+ // fprintf(F, "%c", chr) --> fputc(chr, F)
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+ EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
+ return ConstantInt::get(CI->getType(), 1);
+ }
+
+ if (FormatStr[1] == 's') {
+ // fprintf(F, "%s", str) --> fputs(str, F)
+ if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
+ return 0;
+ EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
+ return CI;
+ }
+ return 0;
+ }
+};
+
+} // end anonymous namespace.
+
+//===----------------------------------------------------------------------===//
+// SimplifyLibCalls Pass Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// This pass optimizes well known library functions from libc and libm.
+ ///
+ class SimplifyLibCalls : public FunctionPass {
+ StringMap<LibCallOptimization*> Optimizations;
+ // String and Memory LibCall Optimizations
+ StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrCmpOpt StrCmp;
+ StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
+ StrNCpyOpt StrNCpy; StrLenOpt StrLen;
+ StrToOpt StrTo; StrStrOpt StrStr;
+ MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet;
+ // Math Library Optimizations
+ PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP;
+ // Integer Optimizations
+ FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii;
+ ToAsciiOpt ToAscii;
+ // Formatting and IO Optimizations
+ SPrintFOpt SPrintF; PrintFOpt PrintF;
+ FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF;
+
+ bool Modified; // This is only used by doInitialization.
+ public:
+ static char ID; // Pass identification
+ SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {}
+ void InitOptimizations();
+ bool runOnFunction(Function &F);
+
+ void setDoesNotAccessMemory(Function &F);
+ void setOnlyReadsMemory(Function &F);
+ void setDoesNotThrow(Function &F);
+ void setDoesNotCapture(Function &F, unsigned n);
+ void setDoesNotAlias(Function &F, unsigned n);
+ bool doInitialization(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ }
+ };
+ char SimplifyLibCalls::ID = 0;
+} // end anonymous namespace.
+
+INITIALIZE_PASS(SimplifyLibCalls, "simplify-libcalls",
+ "Simplify well-known library calls", false, false);
+
+// Public interface to the Simplify LibCalls pass.
+FunctionPass *llvm::createSimplifyLibCallsPass() {
+ return new SimplifyLibCalls();
+}
+
+/// Optimizations - Populate the Optimizations map with all the optimizations
+/// we know.
+void SimplifyLibCalls::InitOptimizations() {
+ // String and Memory LibCall Optimizations
+ Optimizations["strcat"] = &StrCat;
+ Optimizations["strncat"] = &StrNCat;
+ Optimizations["strchr"] = &StrChr;
+ Optimizations["strcmp"] = &StrCmp;
+ Optimizations["strncmp"] = &StrNCmp;
+ Optimizations["strcpy"] = &StrCpy;
+ Optimizations["strncpy"] = &StrNCpy;
+ Optimizations["strlen"] = &StrLen;
+ Optimizations["strtol"] = &StrTo;
+ Optimizations["strtod"] = &StrTo;
+ Optimizations["strtof"] = &StrTo;
+ Optimizations["strtoul"] = &StrTo;
+ Optimizations["strtoll"] = &StrTo;
+ Optimizations["strtold"] = &StrTo;
+ Optimizations["strtoull"] = &StrTo;
+ Optimizations["strstr"] = &StrStr;
+ Optimizations["memcmp"] = &MemCmp;
+ Optimizations["memcpy"] = &MemCpy;
+ Optimizations["memmove"] = &MemMove;
+ Optimizations["memset"] = &MemSet;
+
+ // _chk variants of String and Memory LibCall Optimizations.
+ Optimizations["__strcpy_chk"] = &StrCpyChk;
+
+ // Math Library Optimizations
+ Optimizations["powf"] = &Pow;
+ Optimizations["pow"] = &Pow;
+ Optimizations["powl"] = &Pow;
+ Optimizations["llvm.pow.f32"] = &Pow;
+ Optimizations["llvm.pow.f64"] = &Pow;
+ Optimizations["llvm.pow.f80"] = &Pow;
+ Optimizations["llvm.pow.f128"] = &Pow;
+ Optimizations["llvm.pow.ppcf128"] = &Pow;
+ Optimizations["exp2l"] = &Exp2;
+ Optimizations["exp2"] = &Exp2;
+ Optimizations["exp2f"] = &Exp2;
+ Optimizations["llvm.exp2.ppcf128"] = &Exp2;
+ Optimizations["llvm.exp2.f128"] = &Exp2;
+ Optimizations["llvm.exp2.f80"] = &Exp2;
+ Optimizations["llvm.exp2.f64"] = &Exp2;
+ Optimizations["llvm.exp2.f32"] = &Exp2;
+
+#ifdef HAVE_FLOORF
+ Optimizations["floor"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_CEILF
+ Optimizations["ceil"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_ROUNDF
+ Optimizations["round"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_RINTF
+ Optimizations["rint"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_NEARBYINTF
+ Optimizations["nearbyint"] = &UnaryDoubleFP;
+#endif
+
+ // Integer Optimizations
+ Optimizations["ffs"] = &FFS;
+ Optimizations["ffsl"] = &FFS;
+ Optimizations["ffsll"] = &FFS;
+ Optimizations["abs"] = &Abs;
+ Optimizations["labs"] = &Abs;
+ Optimizations["llabs"] = &Abs;
+ Optimizations["isdigit"] = &IsDigit;
+ Optimizations["isascii"] = &IsAscii;
+ Optimizations["toascii"] = &ToAscii;
+
+ // Formatting and IO Optimizations
+ Optimizations["sprintf"] = &SPrintF;
+ Optimizations["printf"] = &PrintF;
+ Optimizations["fwrite"] = &FWrite;
+ Optimizations["fputs"] = &FPuts;
+ Optimizations["fprintf"] = &FPrintF;
+}
+
+
+/// runOnFunction - Top level algorithm.
+///
+bool SimplifyLibCalls::runOnFunction(Function &F) {
+ if (Optimizations.empty())
+ InitOptimizations();
+
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+
+ IRBuilder<> Builder(F.getContext());
+
+ bool Changed = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ // Ignore non-calls.
+ CallInst *CI = dyn_cast<CallInst>(I++);
+ if (!CI) continue;
+
+ // Ignore indirect calls and calls to non-external functions.
+ Function *Callee = CI->getCalledFunction();
+ if (Callee == 0 || !Callee->isDeclaration() ||
+ !(Callee->hasExternalLinkage() || Callee->hasDLLImportLinkage()))
+ continue;
+
+ // Ignore unknown calls.
+ LibCallOptimization *LCO = Optimizations.lookup(Callee->getName());
+ if (!LCO) continue;
+
+ // Set the builder to the instruction after the call.
+ Builder.SetInsertPoint(BB, I);
+
+ // Try to optimize this call.
+ Value *Result = LCO->OptimizeCall(CI, TD, Builder);
+ if (Result == 0) continue;
+
+ DEBUG(dbgs() << "SimplifyLibCalls simplified: " << *CI;
+ dbgs() << " into: " << *Result << "\n");
+
+ // Something changed!
+ Changed = true;
+ ++NumSimplified;
+
+ // Inspect the instruction after the call (which was potentially just
+ // added) next.
+ I = CI; ++I;
+
+ if (CI != Result && !CI->use_empty()) {
+ CI->replaceAllUsesWith(Result);
+ if (!Result->hasName())
+ Result->takeName(CI);
+ }
+ CI->eraseFromParent();
+ }
+ }
+ return Changed;
+}
+
+// Utility methods for doInitialization.
+
+void SimplifyLibCalls::setDoesNotAccessMemory(Function &F) {
+ if (!F.doesNotAccessMemory()) {
+ F.setDoesNotAccessMemory();
+ ++NumAnnotated;
+ Modified = true;
+ }
+}
+void SimplifyLibCalls::setOnlyReadsMemory(Function &F) {
+ if (!F.onlyReadsMemory()) {
+ F.setOnlyReadsMemory();
+ ++NumAnnotated;
+ Modified = true;
+ }
+}
+void SimplifyLibCalls::setDoesNotThrow(Function &F) {
+ if (!F.doesNotThrow()) {
+ F.setDoesNotThrow();
+ ++NumAnnotated;
+ Modified = true;
+ }
+}
+void SimplifyLibCalls::setDoesNotCapture(Function &F, unsigned n) {
+ if (!F.doesNotCapture(n)) {
+ F.setDoesNotCapture(n);
+ ++NumAnnotated;
+ Modified = true;
+ }
+}
+void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) {
+ if (!F.doesNotAlias(n)) {
+ F.setDoesNotAlias(n);
+ ++NumAnnotated;
+ Modified = true;
+ }
+}
+
+/// doInitialization - Add attributes to well-known functions.
+///
+bool SimplifyLibCalls::doInitialization(Module &M) {
+ Modified = false;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ Function &F = *I;
+ if (!F.isDeclaration())
+ continue;
+
+ if (!F.hasName())
+ continue;
+
+ const FunctionType *FTy = F.getFunctionType();
+
+ StringRef Name = F.getName();
+ switch (Name[0]) {
+ case 's':
+ if (Name == "strlen") {
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "strchr" ||
+ Name == "strrchr") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isIntegerTy())
+ continue;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ } else if (Name == "strcpy" ||
+ Name == "stpcpy" ||
+ Name == "strcat" ||
+ Name == "strtol" ||
+ Name == "strtod" ||
+ Name == "strtof" ||
+ Name == "strtoul" ||
+ Name == "strtoll" ||
+ Name == "strtold" ||
+ Name == "strncat" ||
+ Name == "strncpy" ||
+ Name == "strtoull") {
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "strxfrm") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "strcmp" ||
+ Name == "strspn" ||
+ Name == "strncmp" ||
+ Name == "strcspn" ||
+ Name == "strcoll" ||
+ Name == "strcasecmp" ||
+ Name == "strncasecmp") {
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "strstr" ||
+ Name == "strpbrk") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "strtok" ||
+ Name == "strtok_r") {
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "scanf" ||
+ Name == "setbuf" ||
+ Name == "setvbuf") {
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "strdup" ||
+ Name == "strndup") {
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "stat" ||
+ Name == "sscanf" ||
+ Name == "sprintf" ||
+ Name == "statvfs") {
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "snprintf") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 3);
+ } else if (Name == "setitimer") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ setDoesNotCapture(F, 3);
+ } else if (Name == "system") {
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ // May throw; "system" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'm':
+ if (Name == "malloc") {
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getReturnType()->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ } else if (Name == "memcmp") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "memchr" ||
+ Name == "memrchr") {
+ if (FTy->getNumParams() != 3)
+ continue;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ } else if (Name == "modf" ||
+ Name == "modff" ||
+ Name == "modfl" ||
+ Name == "memcpy" ||
+ Name == "memccpy" ||
+ Name == "memmove") {
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "memalign") {
+ if (!FTy->getReturnType()->isPointerTy())
+ continue;
+ setDoesNotAlias(F, 0);
+ } else if (Name == "mkdir" ||
+ Name == "mktime") {
+ if (FTy->getNumParams() == 0 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'r':
+ if (Name == "realloc") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getReturnType()->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "read") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ // May throw; "read" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 2);
+ } else if (Name == "rmdir" ||
+ Name == "rewind" ||
+ Name == "remove" ||
+ Name == "realpath") {
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "rename" ||
+ Name == "readlink") {
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ }
+ break;
+ case 'w':
+ if (Name == "write") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ // May throw; "write" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 2);
+ }
+ break;
+ case 'b':
+ if (Name == "bcopy") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "bcmp") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "bzero") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'c':
+ if (Name == "calloc") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ } else if (Name == "chmod" ||
+ Name == "chown" ||
+ Name == "ctermid" ||
+ Name == "clearerr" ||
+ Name == "closedir") {
+ if (FTy->getNumParams() == 0 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'a':
+ if (Name == "atoi" ||
+ Name == "atol" ||
+ Name == "atof" ||
+ Name == "atoll") {
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "access") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'f':
+ if (Name == "fopen") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "fdopen") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "feof" ||
+ Name == "free" ||
+ Name == "fseek" ||
+ Name == "ftell" ||
+ Name == "fgetc" ||
+ Name == "fseeko" ||
+ Name == "ftello" ||
+ Name == "fileno" ||
+ Name == "fflush" ||
+ Name == "fclose" ||
+ Name == "fsetpos" ||
+ Name == "flockfile" ||
+ Name == "funlockfile" ||
+ Name == "ftrylockfile") {
+ if (FTy->getNumParams() == 0 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "ferror") {
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F);
+ } else if (Name == "fputc" ||
+ Name == "fstat" ||
+ Name == "frexp" ||
+ Name == "frexpf" ||
+ Name == "frexpl" ||
+ Name == "fstatvfs") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "fgets") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 3);
+ } else if (Name == "fread" ||
+ Name == "fwrite") {
+ if (FTy->getNumParams() != 4 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(3)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 4);
+ } else if (Name == "fputs" ||
+ Name == "fscanf" ||
+ Name == "fprintf" ||
+ Name == "fgetpos") {
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ }
+ break;
+ case 'g':
+ if (Name == "getc" ||
+ Name == "getlogin_r" ||
+ Name == "getc_unlocked") {
+ if (FTy->getNumParams() == 0 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "getenv") {
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "gets" ||
+ Name == "getchar") {
+ setDoesNotThrow(F);
+ } else if (Name == "getitimer") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "getpwnam") {
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'u':
+ if (Name == "ungetc") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "uname" ||
+ Name == "unlink" ||
+ Name == "unsetenv") {
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "utime" ||
+ Name == "utimes") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ }
+ break;
+ case 'p':
+ if (Name == "putc") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "puts" ||
+ Name == "printf" ||
+ Name == "perror") {
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "pread" ||
+ Name == "pwrite") {
+ if (FTy->getNumParams() != 4 ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ // May throw; these are valid pthread cancellation points.
+ setDoesNotCapture(F, 2);
+ } else if (Name == "putchar") {
+ setDoesNotThrow(F);
+ } else if (Name == "popen") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "pclose") {
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'v':
+ if (Name == "vscanf") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "vsscanf" ||
+ Name == "vfscanf") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "valloc") {
+ if (!FTy->getReturnType()->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ } else if (Name == "vprintf") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "vfprintf" ||
+ Name == "vsprintf") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "vsnprintf") {
+ if (FTy->getNumParams() != 4 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 3);
+ }
+ break;
+ case 'o':
+ if (Name == "open") {
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ // May throw; "open" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ } else if (Name == "opendir") {
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 't':
+ if (Name == "tmpfile") {
+ if (!FTy->getReturnType()->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ } else if (Name == "times") {
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'h':
+ if (Name == "htonl" ||
+ Name == "htons") {
+ setDoesNotThrow(F);
+ setDoesNotAccessMemory(F);
+ }
+ break;
+ case 'n':
+ if (Name == "ntohl" ||
+ Name == "ntohs") {
+ setDoesNotThrow(F);
+ setDoesNotAccessMemory(F);
+ }
+ break;
+ case 'l':
+ if (Name == "lstat") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "lchown") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'q':
+ if (Name == "qsort") {
+ if (FTy->getNumParams() != 4 ||
+ !FTy->getParamType(3)->isPointerTy())
+ continue;
+ // May throw; places call through function pointer.
+ setDoesNotCapture(F, 4);
+ }
+ break;
+ case '_':
+ if (Name == "__strdup" ||
+ Name == "__strndup") {
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "__strtok_r") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "_IO_getc") {
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "_IO_putc") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ }
+ break;
+ case 1:
+ if (Name == "\1__isoc99_scanf") {
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "\1stat64" ||
+ Name == "\1lstat64" ||
+ Name == "\1statvfs64" ||
+ Name == "\1__isoc99_sscanf") {
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "\1fopen64") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "\1fseeko64" ||
+ Name == "\1ftello64") {
+ if (FTy->getNumParams() == 0 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "\1tmpfile64") {
+ if (!FTy->getReturnType()->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ } else if (Name == "\1fstat64" ||
+ Name == "\1fstatvfs64") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ continue;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "\1open64") {
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy())
+ continue;
+ // May throw; "open" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ }
+ }
+ return Modified;
+}
+
+// TODO:
+// Additional cases that we need to add to this file:
+//
+// cbrt:
+// * cbrt(expN(X)) -> expN(x/3)
+// * cbrt(sqrt(x)) -> pow(x,1/6)
+// * cbrt(sqrt(x)) -> pow(x,1/9)
+//
+// cos, cosf, cosl:
+// * cos(-x) -> cos(x)
+//
+// exp, expf, expl:
+// * exp(log(x)) -> x
+//
+// log, logf, logl:
+// * log(exp(x)) -> x
+// * log(x**y) -> y*log(x)
+// * log(exp(y)) -> y*log(e)
+// * log(exp2(y)) -> y*log(2)
+// * log(exp10(y)) -> y*log(10)
+// * log(sqrt(x)) -> 0.5*log(x)
+// * log(pow(x,y)) -> y*log(x)
+//
+// lround, lroundf, lroundl:
+// * lround(cnst) -> cnst'
+//
+// pow, powf, powl:
+// * pow(exp(x),y) -> exp(x*y)
+// * pow(sqrt(x),y) -> pow(x,y*0.5)
+// * pow(pow(x,y),z)-> pow(x,y*z)
+//
+// puts:
+// * puts("") -> putchar('\n')
+//
+// round, roundf, roundl:
+// * round(cnst) -> cnst'
+//
+// signbit:
+// * signbit(cnst) -> cnst'
+// * signbit(nncst) -> 0 (if pstv is a non-negative constant)
+//
+// sqrt, sqrtf, sqrtl:
+// * sqrt(expN(x)) -> expN(x*0.5)
+// * sqrt(Nroot(x)) -> pow(x,1/(2*N))
+// * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
+//
+// stpcpy:
+// * stpcpy(str, "literal") ->
+// llvm.memcpy(str,"literal",strlen("literal")+1,1)
+// strrchr:
+// * strrchr(s,c) -> reverse_offset_of_in(c,s)
+// (if c is a constant integer and s is a constant string)
+// * strrchr(s1,0) -> strchr(s1,0)
+//
+// strpbrk:
+// * strpbrk(s,a) -> offset_in_for(s,a)
+// (if s and a are both constant strings)
+// * strpbrk(s,"") -> 0
+// * strpbrk(s,a) -> strchr(s,a[0]) (if a is constant string of length 1)
+//
+// strspn, strcspn:
+// * strspn(s,a) -> const_int (if both args are constant)
+// * strspn("",a) -> 0
+// * strspn(s,"") -> 0
+// * strcspn(s,a) -> const_int (if both args are constant)
+// * strcspn("",a) -> 0
+// * strcspn(s,"") -> strlen(a)
+//
+// tan, tanf, tanl:
+// * tan(atan(x)) -> x
+//
+// trunc, truncf, truncl:
+// * trunc(cnst) -> cnst'
+//
+//
diff --git a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
new file mode 100644
index 0000000..95d3ded
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -0,0 +1,266 @@
+//===-- Sink.cpp - Code Sinking -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass moves instructions into successor blocks, when possible, so that
+// they aren't executed on paths where their results aren't needed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sink"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumSunk, "Number of instructions sunk");
+
+namespace {
+ class Sinking : public FunctionPass {
+ DominatorTree *DT;
+ LoopInfo *LI;
+ AliasAnalysis *AA;
+
+ public:
+ static char ID; // Pass identification
+ Sinking() : FunctionPass(ID) {}
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ FunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<LoopInfo>();
+ }
+ private:
+ bool ProcessBlock(BasicBlock &BB);
+ bool SinkInstruction(Instruction *I, SmallPtrSet<Instruction *, 8> &Stores);
+ bool AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB) const;
+ };
+} // end anonymous namespace
+
+char Sinking::ID = 0;
+INITIALIZE_PASS(Sinking, "sink", "Code sinking", false, false);
+
+FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
+
+/// AllUsesDominatedByBlock - Return true if all uses of the specified value
+/// occur in blocks dominated by the specified block.
+bool Sinking::AllUsesDominatedByBlock(Instruction *Inst,
+ BasicBlock *BB) const {
+ // Ignoring debug uses is necessary so debug info doesn't affect the code.
+ // This may leave a referencing dbg_value in the original block, before
+ // the definition of the vreg. Dwarf generator handles this although the
+ // user might not get the right info at runtime.
+ for (Value::use_iterator I = Inst->use_begin(),
+ E = Inst->use_end(); I != E; ++I) {
+ // Determine the block of the use.
+ Instruction *UseInst = cast<Instruction>(*I);
+ BasicBlock *UseBlock = UseInst->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(UseInst)) {
+ // PHI nodes use the operand in the predecessor block, not the block with
+ // the PHI.
+ unsigned Num = PHINode::getIncomingValueNumForOperand(I.getOperandNo());
+ UseBlock = PN->getIncomingBlock(Num);
+ }
+ // Check that it dominates.
+ if (!DT->dominates(BB, UseBlock))
+ return false;
+ }
+ return true;
+}
+
+bool Sinking::runOnFunction(Function &F) {
+ DT = &getAnalysis<DominatorTree>();
+ LI = &getAnalysis<LoopInfo>();
+ AA = &getAnalysis<AliasAnalysis>();
+
+ bool EverMadeChange = false;
+
+ while (1) {
+ bool MadeChange = false;
+
+ // Process all basic blocks.
+ for (Function::iterator I = F.begin(), E = F.end();
+ I != E; ++I)
+ MadeChange |= ProcessBlock(*I);
+
+ // If this iteration over the code changed anything, keep iterating.
+ if (!MadeChange) break;
+ EverMadeChange = true;
+ }
+ return EverMadeChange;
+}
+
+bool Sinking::ProcessBlock(BasicBlock &BB) {
+ // Can't sink anything out of a block that has less than two successors.
+ if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return false;
+
+ // Don't bother sinking code out of unreachable blocks. In addition to being
+ // unprofitable, it can also lead to infinite looping, because in an unreachable
+ // loop there may be nowhere to stop.
+ if (!DT->isReachableFromEntry(&BB)) return false;
+
+ bool MadeChange = false;
+
+ // Walk the basic block bottom-up. Remember if we saw a store.
+ BasicBlock::iterator I = BB.end();
+ --I;
+ bool ProcessedBegin = false;
+ SmallPtrSet<Instruction *, 8> Stores;
+ do {
+ Instruction *Inst = I; // The instruction to sink.
+
+ // Predecrement I (if it's not begin) so that it isn't invalidated by
+ // sinking.
+ ProcessedBegin = I == BB.begin();
+ if (!ProcessedBegin)
+ --I;
+
+ if (isa<DbgInfoIntrinsic>(Inst))
+ continue;
+
+ if (SinkInstruction(Inst, Stores))
+ ++NumSunk, MadeChange = true;
+
+ // If we just processed the first instruction in the block, we're done.
+ } while (!ProcessedBegin);
+
+ return MadeChange;
+}
+
+static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
+ SmallPtrSet<Instruction *, 8> &Stores) {
+ if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
+ if (L->isVolatile()) return false;
+
+ Value *Ptr = L->getPointerOperand();
+ unsigned Size = AA->getTypeStoreSize(L->getType());
+ for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(),
+ E = Stores.end(); I != E; ++I)
+ if (AA->getModRefInfo(*I, Ptr, Size) & AliasAnalysis::Mod)
+ return false;
+ }
+
+ if (Inst->mayWriteToMemory()) {
+ Stores.insert(Inst);
+ return false;
+ }
+
+ return Inst->isSafeToSpeculativelyExecute();
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool Sinking::SinkInstruction(Instruction *Inst,
+ SmallPtrSet<Instruction *, 8> &Stores) {
+ // Check if it's safe to move the instruction.
+ if (!isSafeToMove(Inst, AA, Stores))
+ return false;
+
+ // FIXME: This should include support for sinking instructions within the
+ // block they are currently in to shorten the live ranges. We often get
+ // instructions sunk into the top of a large block, but it would be better to
+ // also sink them down before their first use in the block. This xform has to
+ // be careful not to *increase* register pressure though, e.g. sinking
+ // "x = y + z" down if it kills y and z would increase the live ranges of y
+ // and z and only shrink the live range of x.
+
+ // Loop over all the operands of the specified instruction. If there is
+ // anything we can't handle, bail out.
+ BasicBlock *ParentBlock = Inst->getParent();
+
+ // SuccToSinkTo - This is the successor to sink this instruction to, once we
+ // decide.
+ BasicBlock *SuccToSinkTo = 0;
+
+ // FIXME: This picks a successor to sink into based on having one
+ // successor that dominates all the uses. However, there are cases where
+ // sinking can happen but where the sink point isn't a successor. For
+ // example:
+ // x = computation
+ // if () {} else {}
+ // use x
+ // the instruction could be sunk over the whole diamond for the
+ // if/then/else (or loop, etc), allowing it to be sunk into other blocks
+ // after that.
+
+ // Instructions can only be sunk if all their uses are in blocks
+ // dominated by one of the successors.
+ // Look at all the successors and decide which one
+ // we should sink to.
+ for (succ_iterator SI = succ_begin(ParentBlock),
+ E = succ_end(ParentBlock); SI != E; ++SI) {
+ if (AllUsesDominatedByBlock(Inst, *SI)) {
+ SuccToSinkTo = *SI;
+ break;
+ }
+ }
+
+ // If we couldn't find a block to sink to, ignore this instruction.
+ if (SuccToSinkTo == 0)
+ return false;
+
+ // It is not possible to sink an instruction into its own block. This can
+ // happen with loops.
+ if (Inst->getParent() == SuccToSinkTo)
+ return false;
+
+ DEBUG(dbgs() << "Sink instr " << *Inst);
+ DEBUG(dbgs() << "to block ";
+ WriteAsOperand(dbgs(), SuccToSinkTo, false));
+
+ // If the block has multiple predecessors, this would introduce computation on
+ // a path that it doesn't already exist. We could split the critical edge,
+ // but for now we just punt.
+ // FIXME: Split critical edges if not backedges.
+ if (SuccToSinkTo->getUniquePredecessor() != ParentBlock) {
+ // We cannot sink a load across a critical edge - there may be stores in
+ // other code paths.
+ if (!Inst->isSafeToSpeculativelyExecute()) {
+ DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n");
+ return false;
+ }
+
+ // We don't want to sink across a critical edge if we don't dominate the
+ // successor. We could be introducing calculations to new code paths.
+ if (!DT->dominates(ParentBlock, SuccToSinkTo)) {
+ DEBUG(dbgs() << " *** PUNTING: Critical edge found\n");
+ return false;
+ }
+
+ // Don't sink instructions into a loop.
+ if (LI->isLoopHeader(SuccToSinkTo)) {
+ DEBUG(dbgs() << " *** PUNTING: Loop header found\n");
+ return false;
+ }
+
+ // Otherwise we are OK with sinking along a critical edge.
+ DEBUG(dbgs() << "Sinking along critical edge.\n");
+ }
+
+ // Determine where to insert into. Skip phi nodes.
+ BasicBlock::iterator InsertPos = SuccToSinkTo->begin();
+ while (InsertPos != SuccToSinkTo->end() && isa<PHINode>(InsertPos))
+ ++InsertPos;
+
+ // Move the instruction.
+ Inst->moveBefore(InsertPos);
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp b/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp
new file mode 100644
index 0000000..2e437ac
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp
@@ -0,0 +1,371 @@
+//===- TailDuplication.cpp - Simplify CFG through tail duplication --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs a limited form of tail duplication, intended to simplify
+// CFGs by removing some unconditional branches. This pass is necessary to
+// straighten out loops created by the C front-end, but also is capable of
+// making other code nicer. After this pass is run, the CFG simplify pass
+// should be run to clean up the mess.
+//
+// This pass could be enhanced in the future to use profile information to be
+// more aggressive.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tailduplicate"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constant.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumEliminated, "Number of unconditional branches eliminated");
+
+static cl::opt<unsigned>
+TailDupThreshold("taildup-threshold",
+ cl::desc("Max block size to tail duplicate"),
+ cl::init(1), cl::Hidden);
+
+namespace {
+ class TailDup : public FunctionPass {
+ bool runOnFunction(Function &F);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ TailDup() : FunctionPass(ID) {}
+
+ private:
+ inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned);
+ inline void eliminateUnconditionalBranch(BranchInst *BI);
+ SmallPtrSet<BasicBlock*, 4> CycleDetector;
+ };
+}
+
+char TailDup::ID = 0;
+INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false);
+
+// Public interface to the Tail Duplication pass
+FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); }
+
+/// runOnFunction - Top level algorithm - Loop over each unconditional branch in
+/// the function, eliminating it if it looks attractive enough. CycleDetector
+/// prevents infinite loops by checking that we aren't redirecting a branch to
+/// a place it already pointed to earlier; see PR 2323.
+bool TailDup::runOnFunction(Function &F) {
+ bool Changed = false;
+ CycleDetector.clear();
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+ if (shouldEliminateUnconditionalBranch(I->getTerminator(),
+ TailDupThreshold)) {
+ eliminateUnconditionalBranch(cast<BranchInst>(I->getTerminator()));
+ Changed = true;
+ } else {
+ ++I;
+ CycleDetector.clear();
+ }
+ }
+ return Changed;
+}
+
+/// shouldEliminateUnconditionalBranch - Return true if this branch looks
+/// attractive to eliminate. We eliminate the branch if the destination basic
+/// block has <= 5 instructions in it, not counting PHI nodes. In practice,
+/// since one of these is a terminator instruction, this means that we will add
+/// up to 4 instructions to the new block.
+///
+/// We don't count PHI nodes in the count since they will be removed when the
+/// contents of the block are copied over.
+///
+bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI,
+ unsigned Threshold) {
+ BranchInst *BI = dyn_cast<BranchInst>(TI);
+ if (!BI || !BI->isUnconditional()) return false; // Not an uncond branch!
+
+ BasicBlock *Dest = BI->getSuccessor(0);
+ if (Dest == BI->getParent()) return false; // Do not loop infinitely!
+
+ // Do not inline a block if we will just get another branch to the same block!
+ TerminatorInst *DTI = Dest->getTerminator();
+ if (BranchInst *DBI = dyn_cast<BranchInst>(DTI))
+ if (DBI->isUnconditional() && DBI->getSuccessor(0) == Dest)
+ return false; // Do not loop infinitely!
+
+ // FIXME: DemoteRegToStack cannot yet demote invoke instructions to the stack,
+ // because doing so would require breaking critical edges. This should be
+ // fixed eventually.
+ if (!DTI->use_empty())
+ return false;
+
+ // Do not bother with blocks with only a single predecessor: simplify
+ // CFG will fold these two blocks together!
+ pred_iterator PI = pred_begin(Dest), PE = pred_end(Dest);
+ ++PI;
+ if (PI == PE) return false; // Exactly one predecessor!
+
+ BasicBlock::iterator I = Dest->getFirstNonPHI();
+
+ for (unsigned Size = 0; I != Dest->end(); ++I) {
+ if (Size == Threshold) return false; // The block is too large.
+
+ // Don't tail duplicate call instructions. They are very large compared to
+ // other instructions.
+ if (isa<CallInst>(I) || isa<InvokeInst>(I)) return false;
+
+ // Also alloca and malloc.
+ if (isa<AllocaInst>(I)) return false;
+
+ // Some vector instructions can expand into a number of instructions.
+ if (isa<ShuffleVectorInst>(I) || isa<ExtractElementInst>(I) ||
+ isa<InsertElementInst>(I)) return false;
+
+ // Only count instructions that are not debugger intrinsics.
+ if (!isa<DbgInfoIntrinsic>(I)) ++Size;
+ }
+
+ // Do not tail duplicate a block that has thousands of successors into a block
+ // with a single successor if the block has many other predecessors. This can
+ // cause an N^2 explosion in CFG edges (and PHI node entries), as seen in
+ // cases that have a large number of indirect gotos.
+ unsigned NumSuccs = DTI->getNumSuccessors();
+ if (NumSuccs > 8) {
+ unsigned TooMany = 128;
+ if (NumSuccs >= TooMany) return false;
+ TooMany = TooMany/NumSuccs;
+ for (; PI != PE; ++PI)
+ if (TooMany-- == 0) return false;
+ }
+
+ // If this unconditional branch is a fall-through, be careful about
+ // tail duplicating it. In particular, we don't want to taildup it if the
+ // original block will still be there after taildup is completed: doing so
+ // would eliminate the fall-through, requiring unconditional branches.
+ Function::iterator DestI = Dest;
+ if (&*--DestI == BI->getParent()) {
+ // The uncond branch is a fall-through. Tail duplication of the block is
+ // will eliminate the fall-through-ness and end up cloning the terminator
+ // at the end of the Dest block. Since the original Dest block will
+ // continue to exist, this means that one or the other will not be able to
+ // fall through. One typical example that this helps with is code like:
+ // if (a)
+ // foo();
+ // if (b)
+ // foo();
+ // Cloning the 'if b' block into the end of the first foo block is messy.
+
+ // The messy case is when the fall-through block falls through to other
+ // blocks. This is what we would be preventing if we cloned the block.
+ DestI = Dest;
+ if (++DestI != Dest->getParent()->end()) {
+ BasicBlock *DestSucc = DestI;
+ // If any of Dest's successors are fall-throughs, don't do this xform.
+ for (succ_iterator SI = succ_begin(Dest), SE = succ_end(Dest);
+ SI != SE; ++SI)
+ if (*SI == DestSucc)
+ return false;
+ }
+ }
+
+ // Finally, check that we haven't redirected to this target block earlier;
+ // there are cases where we loop forever if we don't check this (PR 2323).
+ if (!CycleDetector.insert(Dest))
+ return false;
+
+ return true;
+}
+
+/// FindObviousSharedDomOf - We know there is a branch from SrcBlock to
+/// DestBlock, and that SrcBlock is not the only predecessor of DstBlock. If we
+/// can find a predecessor of SrcBlock that is a dominator of both SrcBlock and
+/// DstBlock, return it.
+static BasicBlock *FindObviousSharedDomOf(BasicBlock *SrcBlock,
+ BasicBlock *DstBlock) {
+ // SrcBlock must have a single predecessor.
+ pred_iterator PI = pred_begin(SrcBlock), PE = pred_end(SrcBlock);
+ if (PI == PE || ++PI != PE) return 0;
+
+ BasicBlock *SrcPred = *pred_begin(SrcBlock);
+
+ // Look at the predecessors of DstBlock. One of them will be SrcBlock. If
+ // there is only one other pred, get it, otherwise we can't handle it.
+ PI = pred_begin(DstBlock); PE = pred_end(DstBlock);
+ BasicBlock *DstOtherPred = 0;
+ BasicBlock *P = *PI;
+ if (P == SrcBlock) {
+ if (++PI == PE) return 0;
+ DstOtherPred = *PI;
+ if (++PI != PE) return 0;
+ } else {
+ DstOtherPred = P;
+ if (++PI == PE || *PI != SrcBlock || ++PI != PE) return 0;
+ }
+
+ // We can handle two situations here: "if then" and "if then else" blocks. An
+ // 'if then' situation is just where DstOtherPred == SrcPred.
+ if (DstOtherPred == SrcPred)
+ return SrcPred;
+
+ // Check to see if we have an "if then else" situation, which means that
+ // DstOtherPred will have a single predecessor and it will be SrcPred.
+ PI = pred_begin(DstOtherPred); PE = pred_end(DstOtherPred);
+ if (PI != PE && *PI == SrcPred) {
+ if (++PI != PE) return 0; // Not a single pred.
+ return SrcPred; // Otherwise, it's an "if then" situation. Return the if.
+ }
+
+ // Otherwise, this is something we can't handle.
+ return 0;
+}
+
+
+/// eliminateUnconditionalBranch - Clone the instructions from the destination
+/// block into the source block, eliminating the specified unconditional branch.
+/// If the destination block defines values used by successors of the dest
+/// block, we may need to insert PHI nodes.
+///
+void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
+ BasicBlock *SourceBlock = Branch->getParent();
+ BasicBlock *DestBlock = Branch->getSuccessor(0);
+ assert(SourceBlock != DestBlock && "Our predicate is broken!");
+
+ DEBUG(dbgs() << "TailDuplication[" << SourceBlock->getParent()->getName()
+ << "]: Eliminating branch: " << *Branch);
+
+ // See if we can avoid duplicating code by moving it up to a dominator of both
+ // blocks.
+ if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) {
+ DEBUG(dbgs() << "Found shared dominator: " << DomBlock->getName() << "\n");
+
+ // If there are non-phi instructions in DestBlock that have no operands
+ // defined in DestBlock, and if the instruction has no side effects, we can
+ // move the instruction to DomBlock instead of duplicating it.
+ BasicBlock::iterator BBI = DestBlock->getFirstNonPHI();
+ while (!isa<TerminatorInst>(BBI)) {
+ Instruction *I = BBI++;
+
+ bool CanHoist = I->isSafeToSpeculativelyExecute() &&
+ !I->mayReadFromMemory();
+ if (CanHoist) {
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
+ if (Instruction *OpI = dyn_cast<Instruction>(I->getOperand(op)))
+ if (OpI->getParent() == DestBlock ||
+ (isa<InvokeInst>(OpI) && OpI->getParent() == DomBlock)) {
+ CanHoist = false;
+ break;
+ }
+ if (CanHoist) {
+ // Remove from DestBlock, move right before the term in DomBlock.
+ DestBlock->getInstList().remove(I);
+ DomBlock->getInstList().insert(DomBlock->getTerminator(), I);
+ DEBUG(dbgs() << "Hoisted: " << *I);
+ }
+ }
+ }
+ }
+
+ // Tail duplication can not update SSA properties correctly if the values
+ // defined in the duplicated tail are used outside of the tail itself. For
+ // this reason, we spill all values that are used outside of the tail to the
+ // stack.
+ for (BasicBlock::iterator I = DestBlock->begin(); I != DestBlock->end(); ++I)
+ if (I->isUsedOutsideOfBlock(DestBlock)) {
+ // We found a use outside of the tail. Create a new stack slot to
+ // break this inter-block usage pattern.
+ DemoteRegToStack(*I);
+ }
+
+ // We are going to have to map operands from the original block B to the new
+ // copy of the block B'. If there are PHI nodes in the DestBlock, these PHI
+ // nodes also define part of this mapping. Loop over these PHI nodes, adding
+ // them to our mapping.
+ //
+ std::map<Value*, Value*> ValueMapping;
+
+ BasicBlock::iterator BI = DestBlock->begin();
+ bool HadPHINodes = isa<PHINode>(BI);
+ for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+ ValueMapping[PN] = PN->getIncomingValueForBlock(SourceBlock);
+
+ // Clone the non-phi instructions of the dest block into the source block,
+ // keeping track of the mapping...
+ //
+ for (; BI != DestBlock->end(); ++BI) {
+ Instruction *New = BI->clone();
+ New->setName(BI->getName());
+ SourceBlock->getInstList().push_back(New);
+ ValueMapping[BI] = New;
+ }
+
+ // Now that we have built the mapping information and cloned all of the
+ // instructions (giving us a new terminator, among other things), walk the new
+ // instructions, rewriting references of old instructions to use new
+ // instructions.
+ //
+ BI = Branch; ++BI; // Get an iterator to the first new instruction
+ for (; BI != SourceBlock->end(); ++BI)
+ for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
+ std::map<Value*, Value*>::const_iterator I =
+ ValueMapping.find(BI->getOperand(i));
+ if (I != ValueMapping.end())
+ BI->setOperand(i, I->second);
+ }
+
+ // Next we check to see if any of the successors of DestBlock had PHI nodes.
+ // If so, we need to add entries to the PHI nodes for SourceBlock now.
+ for (succ_iterator SI = succ_begin(DestBlock), SE = succ_end(DestBlock);
+ SI != SE; ++SI) {
+ BasicBlock *Succ = *SI;
+ for (BasicBlock::iterator PNI = Succ->begin(); isa<PHINode>(PNI); ++PNI) {
+ PHINode *PN = cast<PHINode>(PNI);
+ // Ok, we have a PHI node. Figure out what the incoming value was for the
+ // DestBlock.
+ Value *IV = PN->getIncomingValueForBlock(DestBlock);
+
+ // Remap the value if necessary...
+ std::map<Value*, Value*>::const_iterator I = ValueMapping.find(IV);
+ if (I != ValueMapping.end())
+ IV = I->second;
+ PN->addIncoming(IV, SourceBlock);
+ }
+ }
+
+ // Next, remove the old branch instruction, and any PHI node entries that we
+ // had.
+ BI = Branch; ++BI; // Get an iterator to the first new instruction
+ DestBlock->removePredecessor(SourceBlock); // Remove entries in PHI nodes...
+ SourceBlock->getInstList().erase(Branch); // Destroy the uncond branch...
+
+ // Final step: now that we have finished everything up, walk the cloned
+ // instructions one last time, constant propagating and DCE'ing them, because
+ // they may not be needed anymore.
+ //
+ if (HadPHINodes) {
+ while (BI != SourceBlock->end()) {
+ Instruction *Inst = BI++;
+ if (isInstructionTriviallyDead(Inst))
+ Inst->eraseFromParent();
+ else if (Constant *C = ConstantFoldInstruction(Inst)) {
+ Inst->replaceAllUsesWith(C);
+ Inst->eraseFromParent();
+ }
+ }
+ }
+
+ ++NumEliminated; // We just killed a branch!
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
new file mode 100644
index 0000000..3717254
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -0,0 +1,535 @@
+//===- TailRecursionElimination.cpp - Eliminate Tail Calls ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file transforms calls of the current function (self recursion) followed
+// by a return instruction with a branch to the entry of the function, creating
+// a loop. This pass also implements the following extensions to the basic
+// algorithm:
+//
+// 1. Trivial instructions between the call and return do not prevent the
+// transformation from taking place, though currently the analysis cannot
+// support moving any really useful instructions (only dead ones).
+// 2. This pass transforms functions that are prevented from being tail
+// recursive by an associative and commutative expression to use an
+// accumulator variable, thus compiling the typical naive factorial or
+// 'fib' implementation into efficient code.
+// 3. TRE is performed if the function returns void, if the return
+// returns the result returned by the call, or if the function returns a
+// run-time constant on all exits from the function. It is possible, though
+// unlikely, that the return returns something else (like constant 0), and
+// can still be TRE'd. It can be TRE'd if ALL OTHER return instructions in
+// the function return the exact same value.
+// 4. If it can prove that callees do not access their caller stack frame,
+// they are marked as eligible for tail call elimination (by the code
+// generator).
+//
+// There are several improvements that could be made:
+//
+// 1. If the function has any alloca instructions, these instructions will be
+// moved out of the entry block of the function, causing them to be
+// evaluated each time through the tail recursion. Safely keeping allocas
+// in the entry block requires analysis to proves that the tail-called
+// function does not read or write the stack object.
+// 2. Tail recursion is only performed if the call immediately preceeds the
+// return instruction. It's possible that there could be a jump between
+// the call and the return.
+// 3. There can be intervening operations between the call and the return that
+// prevent the TRE from occurring. For example, there could be GEP's and
+// stores to memory that will not be read or written by the call. This
+// requires some substantial analysis (such as with DSA) to prove safe to
+// move ahead of the call, but doing so could allow many more TREs to be
+// performed, for example in TreeAdd/TreeAlloc from the treeadd benchmark.
+// 4. The algorithm we use to detect if callees access their caller stack
+// frames is very primitive.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tailcallelim"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumEliminated, "Number of tail calls removed");
+STATISTIC(NumAccumAdded, "Number of accumulators introduced");
+
+namespace {
+ struct TailCallElim : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ TailCallElim() : FunctionPass(ID) {}
+
+ virtual bool runOnFunction(Function &F);
+
+ private:
+ bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail);
+ bool CanMoveAboveCall(Instruction *I, CallInst *CI);
+ Value *CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI);
+ };
+}
+
+char TailCallElim::ID = 0;
+INITIALIZE_PASS(TailCallElim, "tailcallelim",
+ "Tail Call Elimination", false, false);
+
+// Public interface to the TailCallElimination pass
+FunctionPass *llvm::createTailCallEliminationPass() {
+ return new TailCallElim();
+}
+
+/// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by
+/// callees of this function. We only do very simple analysis right now, this
+/// could be expanded in the future to use mod/ref information for particular
+/// call sites if desired.
+static bool AllocaMightEscapeToCalls(AllocaInst *AI) {
+ // FIXME: do simple 'address taken' analysis.
+ return true;
+}
+
+/// CheckForEscapingAllocas - Scan the specified basic block for alloca
+/// instructions. If it contains any that might be accessed by calls, return
+/// true.
+static bool CheckForEscapingAllocas(BasicBlock *BB,
+ bool &CannotTCETailMarkedCall) {
+ bool RetVal = false;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+ RetVal |= AllocaMightEscapeToCalls(AI);
+
+ // If this alloca is in the body of the function, or if it is a variable
+ // sized allocation, we cannot tail call eliminate calls marked 'tail'
+ // with this mechanism.
+ if (BB != &BB->getParent()->getEntryBlock() ||
+ !isa<ConstantInt>(AI->getArraySize()))
+ CannotTCETailMarkedCall = true;
+ }
+ return RetVal;
+}
+
+bool TailCallElim::runOnFunction(Function &F) {
+ // If this function is a varargs function, we won't be able to PHI the args
+ // right, so don't even try to convert it...
+ if (F.getFunctionType()->isVarArg()) return false;
+
+ BasicBlock *OldEntry = 0;
+ bool TailCallsAreMarkedTail = false;
+ SmallVector<PHINode*, 8> ArgumentPHIs;
+ bool MadeChange = false;
+
+ bool FunctionContainsEscapingAllocas = false;
+
+ // CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls
+ // marked with the 'tail' attribute, because doing so would cause the stack
+ // size to increase (real TCE would deallocate variable sized allocas, TCE
+ // doesn't).
+ bool CannotTCETailMarkedCall = false;
+
+ // Loop over the function, looking for any returning blocks, and keeping track
+ // of whether this function has any non-trivially used allocas.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (FunctionContainsEscapingAllocas && CannotTCETailMarkedCall)
+ break;
+
+ FunctionContainsEscapingAllocas |=
+ CheckForEscapingAllocas(BB, CannotTCETailMarkedCall);
+ }
+
+ /// FIXME: The code generator produces really bad code when an 'escaping
+ /// alloca' is changed from being a static alloca to being a dynamic alloca.
+ /// Until this is resolved, disable this transformation if that would ever
+ /// happen. This bug is PR962.
+ if (FunctionContainsEscapingAllocas)
+ return false;
+
+ // Second pass, change any tail calls to loops.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator()))
+ MadeChange |= ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
+ ArgumentPHIs,CannotTCETailMarkedCall);
+
+ // If we eliminated any tail recursions, it's possible that we inserted some
+ // silly PHI nodes which just merge an initial value (the incoming operand)
+ // with themselves. Check to see if we did and clean up our mess if so. This
+ // occurs when a function passes an argument straight through to its tail
+ // call.
+ if (!ArgumentPHIs.empty()) {
+ for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
+ PHINode *PN = ArgumentPHIs[i];
+
+ // If the PHI Node is a dynamic constant, replace it with the value it is.
+ if (Value *PNV = PN->hasConstantValue()) {
+ PN->replaceAllUsesWith(PNV);
+ PN->eraseFromParent();
+ }
+ }
+ }
+
+ // Finally, if this function contains no non-escaping allocas, mark all calls
+ // in the function as eligible for tail calls (there is no stack memory for
+ // them to access).
+ if (!FunctionContainsEscapingAllocas)
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ CI->setTailCall();
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+
+/// CanMoveAboveCall - Return true if it is safe to move the specified
+/// instruction from after the call to before the call, assuming that all
+/// instructions between the call and this instruction are movable.
+///
+bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
+ // FIXME: We can move load/store/call/free instructions above the call if the
+ // call does not mod/ref the memory location being processed.
+ if (I->mayHaveSideEffects()) // This also handles volatile loads.
+ return false;
+
+ if (LoadInst *L = dyn_cast<LoadInst>(I)) {
+ // Loads may always be moved above calls without side effects.
+ if (CI->mayHaveSideEffects()) {
+ // Non-volatile loads may be moved above a call with side effects if it
+ // does not write to memory and the load provably won't trap.
+ // FIXME: Writes to memory only matter if they may alias the pointer
+ // being loaded from.
+ if (CI->mayWriteToMemory() ||
+ !isSafeToLoadUnconditionally(L->getPointerOperand(), L,
+ L->getAlignment()))
+ return false;
+ }
+ }
+
+ // Otherwise, if this is a side-effect free instruction, check to make sure
+ // that it does not use the return value of the call. If it doesn't use the
+ // return value of the call, it must only use things that are defined before
+ // the call, or movable instructions between the call and the instruction
+ // itself.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (I->getOperand(i) == CI)
+ return false;
+ return true;
+}
+
+// isDynamicConstant - Return true if the specified value is the same when the
+// return would exit as it was when the initial iteration of the recursive
+// function was executed.
+//
+// We currently handle static constants and arguments that are not modified as
+// part of the recursion.
+//
+static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
+ if (isa<Constant>(V)) return true; // Static constants are always dyn consts
+
+ // Check to see if this is an immutable argument, if so, the value
+ // will be available to initialize the accumulator.
+ if (Argument *Arg = dyn_cast<Argument>(V)) {
+ // Figure out which argument number this is...
+ unsigned ArgNo = 0;
+ Function *F = CI->getParent()->getParent();
+ for (Function::arg_iterator AI = F->arg_begin(); &*AI != Arg; ++AI)
+ ++ArgNo;
+
+ // If we are passing this argument into call as the corresponding
+ // argument operand, then the argument is dynamically constant.
+ // Otherwise, we cannot transform this function safely.
+ if (CI->getArgOperand(ArgNo) == Arg)
+ return true;
+ }
+
+ // Switch cases are always constant integers. If the value is being switched
+ // on and the return is only reachable from one of its cases, it's
+ // effectively constant.
+ if (BasicBlock *UniquePred = RI->getParent()->getUniquePredecessor())
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(UniquePred->getTerminator()))
+ if (SI->getCondition() == V)
+ return SI->getDefaultDest() != RI->getParent();
+
+ // Not a constant or immutable argument, we can't safely transform.
+ return false;
+}
+
+// getCommonReturnValue - Check to see if the function containing the specified
+// tail call consistently returns the same runtime-constant value at all exit
+// points except for IgnoreRI. If so, return the returned value.
+//
+static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
+ Function *F = CI->getParent()->getParent();
+ Value *ReturnedValue = 0;
+
+ for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) {
+ ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator());
+ if (RI == 0 || RI == IgnoreRI) continue;
+
+ // We can only perform this transformation if the value returned is
+ // evaluatable at the start of the initial invocation of the function,
+ // instead of at the end of the evaluation.
+ //
+ Value *RetOp = RI->getOperand(0);
+ if (!isDynamicConstant(RetOp, CI, RI))
+ return 0;
+
+ if (ReturnedValue && RetOp != ReturnedValue)
+ return 0; // Cannot transform if differing values are returned.
+ ReturnedValue = RetOp;
+ }
+ return ReturnedValue;
+}
+
+/// CanTransformAccumulatorRecursion - If the specified instruction can be
+/// transformed using accumulator recursion elimination, return the constant
+/// which is the start of the accumulator value. Otherwise return null.
+///
+Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
+ CallInst *CI) {
+ if (!I->isAssociative() || !I->isCommutative()) return 0;
+ assert(I->getNumOperands() == 2 &&
+ "Associative/commutative operations should have 2 args!");
+
+ // Exactly one operand should be the result of the call instruction.
+ if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
+ (I->getOperand(0) != CI && I->getOperand(1) != CI))
+ return 0;
+
+ // The only user of this instruction we allow is a single return instruction.
+ if (!I->hasOneUse() || !isa<ReturnInst>(I->use_back()))
+ return 0;
+
+ // Ok, now we have to check all of the other return instructions in this
+ // function. If they return non-constants or differing values, then we cannot
+ // transform the function safely.
+ return getCommonReturnValue(cast<ReturnInst>(I->use_back()), CI);
+}
+
+bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail) {
+ BasicBlock *BB = Ret->getParent();
+ Function *F = BB->getParent();
+
+ if (&BB->front() == Ret) // Make sure there is something before the ret...
+ return false;
+
+ // Scan backwards from the return, checking to see if there is a tail call in
+ // this block. If so, set CI to it.
+ CallInst *CI;
+ BasicBlock::iterator BBI = Ret;
+ while (1) {
+ CI = dyn_cast<CallInst>(BBI);
+ if (CI && CI->getCalledFunction() == F)
+ break;
+
+ if (BBI == BB->begin())
+ return false; // Didn't find a potential tail call.
+ --BBI;
+ }
+
+ // If this call is marked as a tail call, and if there are dynamic allocas in
+ // the function, we cannot perform this optimization.
+ if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
+ return false;
+
+ // As a special case, detect code like this:
+ // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
+ // and disable this xform in this case, because the code generator will
+ // lower the call to fabs into inline code.
+ if (BB == &F->getEntryBlock() &&
+ &BB->front() == CI && &*++BB->begin() == Ret &&
+ callIsSmall(F)) {
+ // A single-block function with just a call and a return. Check that
+ // the arguments match.
+ CallSite::arg_iterator I = CallSite(CI).arg_begin(),
+ E = CallSite(CI).arg_end();
+ Function::arg_iterator FI = F->arg_begin(),
+ FE = F->arg_end();
+ for (; I != E && FI != FE; ++I, ++FI)
+ if (*I != &*FI) break;
+ if (I == E && FI == FE)
+ return false;
+ }
+
+ // If we are introducing accumulator recursion to eliminate operations after
+ // the call instruction that are both associative and commutative, the initial
+ // value for the accumulator is placed in this variable. If this value is set
+ // then we actually perform accumulator recursion elimination instead of
+ // simple tail recursion elimination. If the operation is an LLVM instruction
+ // (eg: "add") then it is recorded in AccumulatorRecursionInstr. If not, then
+ // we are handling the case when the return instruction returns a constant C
+ // which is different to the constant returned by other return instructions
+ // (which is recorded in AccumulatorRecursionEliminationInitVal). This is a
+ // special case of accumulator recursion, the operation being "return C".
+ Value *AccumulatorRecursionEliminationInitVal = 0;
+ Instruction *AccumulatorRecursionInstr = 0;
+
+ // Ok, we found a potential tail call. We can currently only transform the
+ // tail call if all of the instructions between the call and the return are
+ // movable to above the call itself, leaving the call next to the return.
+ // Check that this is the case now.
+ for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) {
+ if (CanMoveAboveCall(BBI, CI)) continue;
+
+ // If we can't move the instruction above the call, it might be because it
+ // is an associative and commutative operation that could be tranformed
+ // using accumulator recursion elimination. Check to see if this is the
+ // case, and if so, remember the initial accumulator value for later.
+ if ((AccumulatorRecursionEliminationInitVal =
+ CanTransformAccumulatorRecursion(BBI, CI))) {
+ // Yes, this is accumulator recursion. Remember which instruction
+ // accumulates.
+ AccumulatorRecursionInstr = BBI;
+ } else {
+ return false; // Otherwise, we cannot eliminate the tail recursion!
+ }
+ }
+
+ // We can only transform call/return pairs that either ignore the return value
+ // of the call and return void, ignore the value of the call and return a
+ // constant, return the value returned by the tail call, or that are being
+ // accumulator recursion variable eliminated.
+ if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI &&
+ !isa<UndefValue>(Ret->getReturnValue()) &&
+ AccumulatorRecursionEliminationInitVal == 0 &&
+ !getCommonReturnValue(0, CI)) {
+ // One case remains that we are able to handle: the current return
+ // instruction returns a constant, and all other return instructions
+ // return a different constant.
+ if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret))
+ return false; // Current return instruction does not return a constant.
+ // Check that all other return instructions return a common constant. If
+ // so, record it in AccumulatorRecursionEliminationInitVal.
+ AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI);
+ if (!AccumulatorRecursionEliminationInitVal)
+ return false;
+ }
+
+ // OK! We can transform this tail call. If this is the first one found,
+ // create the new entry block, allowing us to branch back to the old entry.
+ if (OldEntry == 0) {
+ OldEntry = &F->getEntryBlock();
+ BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry);
+ NewEntry->takeName(OldEntry);
+ OldEntry->setName("tailrecurse");
+ BranchInst::Create(OldEntry, NewEntry);
+
+ // If this tail call is marked 'tail' and if there are any allocas in the
+ // entry block, move them up to the new entry block.
+ TailCallsAreMarkedTail = CI->isTailCall();
+ if (TailCallsAreMarkedTail)
+ // Move all fixed sized allocas from OldEntry to NewEntry.
+ for (BasicBlock::iterator OEBI = OldEntry->begin(), E = OldEntry->end(),
+ NEBI = NewEntry->begin(); OEBI != E; )
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
+ if (isa<ConstantInt>(AI->getArraySize()))
+ AI->moveBefore(NEBI);
+
+ // Now that we have created a new block, which jumps to the entry
+ // block, insert a PHI node for each argument of the function.
+ // For now, we initialize each PHI to only have the real arguments
+ // which are passed in.
+ Instruction *InsertPos = OldEntry->begin();
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I) {
+ PHINode *PN = PHINode::Create(I->getType(),
+ I->getName() + ".tr", InsertPos);
+ I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
+ PN->addIncoming(I, NewEntry);
+ ArgumentPHIs.push_back(PN);
+ }
+ }
+
+ // If this function has self recursive calls in the tail position where some
+ // are marked tail and some are not, only transform one flavor or another. We
+ // have to choose whether we move allocas in the entry block to the new entry
+ // block or not, so we can't make a good choice for both. NOTE: We could do
+ // slightly better here in the case that the function has no entry block
+ // allocas.
+ if (TailCallsAreMarkedTail && !CI->isTailCall())
+ return false;
+
+ // Ok, now that we know we have a pseudo-entry block WITH all of the
+ // required PHI nodes, add entries into the PHI node for the actual
+ // parameters passed into the tail-recursive call.
+ for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
+ ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB);
+
+ // If we are introducing an accumulator variable to eliminate the recursion,
+ // do so now. Note that we _know_ that no subsequent tail recursion
+ // eliminations will happen on this function because of the way the
+ // accumulator recursion predicate is set up.
+ //
+ if (AccumulatorRecursionEliminationInitVal) {
+ Instruction *AccRecInstr = AccumulatorRecursionInstr;
+ // Start by inserting a new PHI node for the accumulator.
+ PHINode *AccPN =
+ PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(),
+ "accumulator.tr", OldEntry->begin());
+
+ // Loop over all of the predecessors of the tail recursion block. For the
+ // real entry into the function we seed the PHI with the initial value,
+ // computed earlier. For any other existing branches to this block (due to
+ // other tail recursions eliminated) the accumulator is not modified.
+ // Because we haven't added the branch in the current block to OldEntry yet,
+ // it will not show up as a predecessor.
+ for (pred_iterator PI = pred_begin(OldEntry), PE = pred_end(OldEntry);
+ PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (P == &F->getEntryBlock())
+ AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P);
+ else
+ AccPN->addIncoming(AccPN, P);
+ }
+
+ if (AccRecInstr) {
+ // Add an incoming argument for the current block, which is computed by
+ // our associative and commutative accumulator instruction.
+ AccPN->addIncoming(AccRecInstr, BB);
+
+ // Next, rewrite the accumulator recursion instruction so that it does not
+ // use the result of the call anymore, instead, use the PHI node we just
+ // inserted.
+ AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN);
+ } else {
+ // Add an incoming argument for the current block, which is just the
+ // constant returned by the current return instruction.
+ AccPN->addIncoming(Ret->getReturnValue(), BB);
+ }
+
+ // Finally, rewrite any return instructions in the program to return the PHI
+ // node instead of the "initval" that they do currently. This loop will
+ // actually rewrite the return value we are destroying, but that's ok.
+ for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
+ RI->setOperand(0, AccPN);
+ ++NumAccumAdded;
+ }
+
+ // Now that all of the PHI nodes are in place, remove the call and
+ // ret instructions, replacing them with an unconditional branch.
+ BranchInst::Create(OldEntry, Ret);
+ BB->getInstList().erase(Ret); // Remove return.
+ BB->getInstList().erase(CI); // Remove call.
+ ++NumEliminated;
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp b/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp
new file mode 100644
index 0000000..4d64c85
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -0,0 +1,598 @@
+//===- AddrModeMatcher.cpp - Addressing mode matching facility --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements target addressing mode matcher class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/AddrModeMatcher.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instruction.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+void ExtAddrMode::print(raw_ostream &OS) const {
+ bool NeedPlus = false;
+ OS << "[";
+ if (BaseGV) {
+ OS << (NeedPlus ? " + " : "")
+ << "GV:";
+ WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
+ NeedPlus = true;
+ }
+
+ if (BaseOffs)
+ OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true;
+
+ if (BaseReg) {
+ OS << (NeedPlus ? " + " : "")
+ << "Base:";
+ WriteAsOperand(OS, BaseReg, /*PrintType=*/false);
+ NeedPlus = true;
+ }
+ if (Scale) {
+ OS << (NeedPlus ? " + " : "")
+ << Scale << "*";
+ WriteAsOperand(OS, ScaledReg, /*PrintType=*/false);
+ NeedPlus = true;
+ }
+
+ OS << ']';
+}
+
+void ExtAddrMode::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+
+
+/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
+/// Return true and update AddrMode if this addr mode is legal for the target,
+/// false if not.
+bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
+ unsigned Depth) {
+ // If Scale is 1, then this is the same as adding ScaleReg to the addressing
+ // mode. Just process that directly.
+ if (Scale == 1)
+ return MatchAddr(ScaleReg, Depth);
+
+ // If the scale is 0, it takes nothing to add this.
+ if (Scale == 0)
+ return true;
+
+ // If we already have a scale of this value, we can add to it, otherwise, we
+ // need an available scale field.
+ if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
+ return false;
+
+ ExtAddrMode TestAddrMode = AddrMode;
+
+ // Add scale to turn X*4+X*3 -> X*7. This could also do things like
+ // [A+B + A*7] -> [B+A*8].
+ TestAddrMode.Scale += Scale;
+ TestAddrMode.ScaledReg = ScaleReg;
+
+ // If the new address isn't legal, bail out.
+ if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
+ return false;
+
+ // It was legal, so commit it.
+ AddrMode = TestAddrMode;
+
+ // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
+ // to see if ScaleReg is actually X+C. If so, we can turn this into adding
+ // X*Scale + C*Scale to addr mode.
+ ConstantInt *CI = 0; Value *AddLHS = 0;
+ if (isa<Instruction>(ScaleReg) && // not a constant expr.
+ match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
+ TestAddrMode.ScaledReg = AddLHS;
+ TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
+
+ // If this addressing mode is legal, commit it and remember that we folded
+ // this instruction.
+ if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
+ AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
+ AddrMode = TestAddrMode;
+ return true;
+ }
+ }
+
+ // Otherwise, not (x+c)*scale, just return what we have.
+ return true;
+}
+
+/// MightBeFoldableInst - This is a little filter, which returns true if an
+/// addressing computation involving I might be folded into a load/store
+/// accessing it. This doesn't need to be perfect, but needs to accept at least
+/// the set of instructions that MatchOperationAddr can.
+static bool MightBeFoldableInst(Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::BitCast:
+ // Don't touch identity bitcasts.
+ if (I->getType() == I->getOperand(0)->getType())
+ return false;
+ return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
+ case Instruction::PtrToInt:
+ // PtrToInt is always a noop, as we know that the int type is pointer sized.
+ return true;
+ case Instruction::IntToPtr:
+ // We know the input is intptr_t, so this is foldable.
+ return true;
+ case Instruction::Add:
+ return true;
+ case Instruction::Mul:
+ case Instruction::Shl:
+ // Can only handle X*C and X << C.
+ return isa<ConstantInt>(I->getOperand(1));
+ case Instruction::GetElementPtr:
+ return true;
+ default:
+ return false;
+ }
+}
+
+
+/// MatchOperationAddr - Given an instruction or constant expr, see if we can
+/// fold the operation into the addressing mode. If so, update the addressing
+/// mode and return true, otherwise return false without modifying AddrMode.
+bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
+ unsigned Depth) {
+ // Avoid exponential behavior on extremely deep expression trees.
+ if (Depth >= 5) return false;
+
+ switch (Opcode) {
+ case Instruction::PtrToInt:
+ // PtrToInt is always a noop, as we know that the int type is pointer sized.
+ return MatchAddr(AddrInst->getOperand(0), Depth);
+ case Instruction::IntToPtr:
+ // This inttoptr is a no-op if the integer type is pointer sized.
+ if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
+ TLI.getPointerTy())
+ return MatchAddr(AddrInst->getOperand(0), Depth);
+ return false;
+ case Instruction::BitCast:
+ // BitCast is always a noop, and we can handle it as long as it is
+ // int->int or pointer->pointer (we don't want int<->fp or something).
+ if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
+ AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
+ // Don't touch identity bitcasts. These were probably put here by LSR,
+ // and we don't want to mess around with them. Assume it knows what it
+ // is doing.
+ AddrInst->getOperand(0)->getType() != AddrInst->getType())
+ return MatchAddr(AddrInst->getOperand(0), Depth);
+ return false;
+ case Instruction::Add: {
+ // Check to see if we can merge in the RHS then the LHS. If so, we win.
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+ if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
+ MatchAddr(AddrInst->getOperand(0), Depth+1))
+ return true;
+
+ // Restore the old addr mode info.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+
+ // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
+ if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
+ MatchAddr(AddrInst->getOperand(1), Depth+1))
+ return true;
+
+ // Otherwise we definitely can't merge the ADD in.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ break;
+ }
+ //case Instruction::Or:
+ // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
+ //break;
+ case Instruction::Mul:
+ case Instruction::Shl: {
+ // Can only handle X*C and X << C.
+ ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
+ if (!RHS) return false;
+ int64_t Scale = RHS->getSExtValue();
+ if (Opcode == Instruction::Shl)
+ Scale = 1LL << Scale;
+
+ return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
+ }
+ case Instruction::GetElementPtr: {
+ // Scan the GEP. We check it if it contains constant offsets and at most
+ // one variable offset.
+ int VariableOperand = -1;
+ unsigned VariableScale = 0;
+
+ int64_t ConstantOffset = 0;
+ const TargetData *TD = TLI.getTargetData();
+ gep_type_iterator GTI = gep_type_begin(AddrInst);
+ for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = TD->getStructLayout(STy);
+ unsigned Idx =
+ cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
+ ConstantOffset += SL->getElementOffset(Idx);
+ } else {
+ uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
+ ConstantOffset += CI->getSExtValue()*TypeSize;
+ } else if (TypeSize) { // Scales of zero don't do anything.
+ // We only allow one variable index at the moment.
+ if (VariableOperand != -1)
+ return false;
+
+ // Remember the variable index.
+ VariableOperand = i;
+ VariableScale = TypeSize;
+ }
+ }
+ }
+
+ // A common case is for the GEP to only do a constant offset. In this case,
+ // just add it to the disp field and check validity.
+ if (VariableOperand == -1) {
+ AddrMode.BaseOffs += ConstantOffset;
+ if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
+ // Check to see if we can fold the base pointer in too.
+ if (MatchAddr(AddrInst->getOperand(0), Depth+1))
+ return true;
+ }
+ AddrMode.BaseOffs -= ConstantOffset;
+ return false;
+ }
+
+ // Save the valid addressing mode in case we can't match.
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+
+ // See if the scale and offset amount is valid for this target.
+ AddrMode.BaseOffs += ConstantOffset;
+
+ // Match the base operand of the GEP.
+ if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
+ // If it couldn't be matched, just stuff the value in a register.
+ if (AddrMode.HasBaseReg) {
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ return false;
+ }
+ AddrMode.HasBaseReg = true;
+ AddrMode.BaseReg = AddrInst->getOperand(0);
+ }
+
+ // Match the remaining variable portion of the GEP.
+ if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
+ Depth)) {
+ // If it couldn't be matched, try stuffing the base into a register
+ // instead of matching it, and retrying the match of the scale.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ if (AddrMode.HasBaseReg)
+ return false;
+ AddrMode.HasBaseReg = true;
+ AddrMode.BaseReg = AddrInst->getOperand(0);
+ AddrMode.BaseOffs += ConstantOffset;
+ if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
+ VariableScale, Depth)) {
+ // If even that didn't work, bail.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ return false;
+ }
+ }
+
+ return true;
+ }
+ }
+ return false;
+}
+
+/// MatchAddr - If we can, try to add the value of 'Addr' into the current
+/// addressing mode. If Addr can't be added to AddrMode this returns false and
+/// leaves AddrMode unmodified. This assumes that Addr is either a pointer type
+/// or intptr_t for the target.
+///
+bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
+ // Fold in immediates if legal for the target.
+ AddrMode.BaseOffs += CI->getSExtValue();
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ return true;
+ AddrMode.BaseOffs -= CI->getSExtValue();
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
+ // If this is a global variable, try to fold it into the addressing mode.
+ if (AddrMode.BaseGV == 0) {
+ AddrMode.BaseGV = GV;
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ return true;
+ AddrMode.BaseGV = 0;
+ }
+ } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+
+ // Check to see if it is possible to fold this operation.
+ if (MatchOperationAddr(I, I->getOpcode(), Depth)) {
+ // Okay, it's possible to fold this. Check to see if it is actually
+ // *profitable* to do so. We use a simple cost model to avoid increasing
+ // register pressure too much.
+ if (I->hasOneUse() ||
+ IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
+ AddrModeInsts.push_back(I);
+ return true;
+ }
+
+ // It isn't profitable to do this, roll back.
+ //cerr << "NOT FOLDING: " << *I;
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ }
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
+ if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
+ return true;
+ } else if (isa<ConstantPointerNull>(Addr)) {
+ // Null pointer gets folded without affecting the addressing mode.
+ return true;
+ }
+
+ // Worse case, the target should support [reg] addressing modes. :)
+ if (!AddrMode.HasBaseReg) {
+ AddrMode.HasBaseReg = true;
+ AddrMode.BaseReg = Addr;
+ // Still check for legality in case the target supports [imm] but not [i+r].
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ return true;
+ AddrMode.HasBaseReg = false;
+ AddrMode.BaseReg = 0;
+ }
+
+ // If the base register is already taken, see if we can do [r+r].
+ if (AddrMode.Scale == 0) {
+ AddrMode.Scale = 1;
+ AddrMode.ScaledReg = Addr;
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ return true;
+ AddrMode.Scale = 0;
+ AddrMode.ScaledReg = 0;
+ }
+ // Couldn't match.
+ return false;
+}
+
+
+/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
+/// inline asm call are due to memory operands. If so, return true, otherwise
+/// return false.
+static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
+ const TargetLowering &TLI) {
+ std::vector<InlineAsm::ConstraintInfo>
+ Constraints = IA->ParseConstraints();
+
+ unsigned ArgNo = 0; // The argument of the CallInst.
+ for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
+ TargetLowering::AsmOperandInfo OpInfo(Constraints[i]);
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ if (OpInfo.isIndirect)
+ OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++);
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++);
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(OpInfo, SDValue());
+
+ // If this asm operand is our Value*, and if it isn't an indirect memory
+ // operand, we can't fold it!
+ if (OpInfo.CallOperandVal == OpVal &&
+ (OpInfo.ConstraintType != TargetLowering::C_Memory ||
+ !OpInfo.isIndirect))
+ return false;
+ }
+
+ return true;
+}
+
+
+/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
+/// memory use. If we find an obviously non-foldable instruction, return true.
+/// Add the ultimately found memory instructions to MemoryUses.
+static bool FindAllMemoryUses(Instruction *I,
+ SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
+ SmallPtrSet<Instruction*, 16> &ConsideredInsts,
+ const TargetLowering &TLI) {
+ // If we already considered this instruction, we're done.
+ if (!ConsideredInsts.insert(I))
+ return false;
+
+ // If this is an obviously unfoldable instruction, bail out.
+ if (!MightBeFoldableInst(I))
+ return true;
+
+ // Loop over all the uses, recursively processing them.
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ User *U = *UI;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ unsigned opNo = UI.getOperandNo();
+ if (opNo == 0) return true; // Storing addr, not into addr.
+ MemoryUses.push_back(std::make_pair(SI, opNo));
+ continue;
+ }
+
+ if (CallInst *CI = dyn_cast<CallInst>(U)) {
+ InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
+ if (!IA) return true;
+
+ // If this is a memory operand, we're cool, otherwise bail out.
+ if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
+ return true;
+ continue;
+ }
+
+ if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts,
+ TLI))
+ return true;
+ }
+
+ return false;
+}
+
+
+/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
+/// the use site that we're folding it into. If so, there is no cost to
+/// include it in the addressing mode. KnownLive1 and KnownLive2 are two values
+/// that we know are live at the instruction already.
+bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
+ Value *KnownLive2) {
+ // If Val is either of the known-live values, we know it is live!
+ if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
+ return true;
+
+ // All values other than instructions and arguments (e.g. constants) are live.
+ if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
+
+ // If Val is a constant sized alloca in the entry block, it is live, this is
+ // true because it is just a reference to the stack/frame pointer, which is
+ // live for the whole function.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
+ if (AI->isStaticAlloca())
+ return true;
+
+ // Check to see if this value is already used in the memory instruction's
+ // block. If so, it's already live into the block at the very least, so we
+ // can reasonably fold it.
+ BasicBlock *MemBB = MemoryInst->getParent();
+ for (Value::use_iterator UI = Val->use_begin(), E = Val->use_end();
+ UI != E; ++UI)
+ // We know that uses of arguments and instructions have to be instructions.
+ if (cast<Instruction>(*UI)->getParent() == MemBB)
+ return true;
+
+ return false;
+}
+
+
+
+/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
+/// mode of the machine to fold the specified instruction into a load or store
+/// that ultimately uses it. However, the specified instruction has multiple
+/// uses. Given this, it may actually increase register pressure to fold it
+/// into the load. For example, consider this code:
+///
+/// X = ...
+/// Y = X+1
+/// use(Y) -> nonload/store
+/// Z = Y+1
+/// load Z
+///
+/// In this case, Y has multiple uses, and can be folded into the load of Z
+/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
+/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
+/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
+/// number of computations either.
+///
+/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
+/// X was live across 'load Z' for other reasons, we actually *would* want to
+/// fold the addressing mode in the Z case. This would make Y die earlier.
+bool AddressingModeMatcher::
+IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
+ ExtAddrMode &AMAfter) {
+ if (IgnoreProfitability) return true;
+
+ // AMBefore is the addressing mode before this instruction was folded into it,
+ // and AMAfter is the addressing mode after the instruction was folded. Get
+ // the set of registers referenced by AMAfter and subtract out those
+ // referenced by AMBefore: this is the set of values which folding in this
+ // address extends the lifetime of.
+ //
+ // Note that there are only two potential values being referenced here,
+ // BaseReg and ScaleReg (global addresses are always available, as are any
+ // folded immediates).
+ Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
+
+ // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
+ // lifetime wasn't extended by adding this instruction.
+ if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+ BaseReg = 0;
+ if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+ ScaledReg = 0;
+
+ // If folding this instruction (and it's subexprs) didn't extend any live
+ // ranges, we're ok with it.
+ if (BaseReg == 0 && ScaledReg == 0)
+ return true;
+
+ // If all uses of this instruction are ultimately load/store/inlineasm's,
+ // check to see if their addressing modes will include this instruction. If
+ // so, we can fold it into all uses, so it doesn't matter if it has multiple
+ // uses.
+ SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
+ SmallPtrSet<Instruction*, 16> ConsideredInsts;
+ if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
+ return false; // Has a non-memory, non-foldable use!
+
+ // Now that we know that all uses of this instruction are part of a chain of
+ // computation involving only operations that could theoretically be folded
+ // into a memory use, loop over each of these uses and see if they could
+ // *actually* fold the instruction.
+ SmallVector<Instruction*, 32> MatchedAddrModeInsts;
+ for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
+ Instruction *User = MemoryUses[i].first;
+ unsigned OpNo = MemoryUses[i].second;
+
+ // Get the access type of this use. If the use isn't a pointer, we don't
+ // know what it accesses.
+ Value *Address = User->getOperand(OpNo);
+ if (!Address->getType()->isPointerTy())
+ return false;
+ const Type *AddressAccessTy =
+ cast<PointerType>(Address->getType())->getElementType();
+
+ // Do a match against the root of this address, ignoring profitability. This
+ // will tell us if the addressing mode for the memory operation will
+ // *actually* cover the shared instruction.
+ ExtAddrMode Result;
+ AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
+ MemoryInst, Result);
+ Matcher.IgnoreProfitability = true;
+ bool Success = Matcher.MatchAddr(Address, 0);
+ Success = Success; assert(Success && "Couldn't select *anything*?");
+
+ // If the match didn't cover I, then it won't be shared by it.
+ if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
+ I) == MatchedAddrModeInsts.end())
+ return false;
+
+ MatchedAddrModeInsts.clear();
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
new file mode 100644
index 0000000..093083a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -0,0 +1,551 @@
+//===-- BasicBlockUtils.cpp - BasicBlock Utilities -------------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform manipulations on basic blocks, and
+// instructions contained within basic blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Constant.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ValueHandle.h"
+#include <algorithm>
+using namespace llvm;
+
+/// DeleteDeadBlock - Delete the specified block, which must have no
+/// predecessors.
+void llvm::DeleteDeadBlock(BasicBlock *BB) {
+ assert((pred_begin(BB) == pred_end(BB) ||
+ // Can delete self loop.
+ BB->getSinglePredecessor() == BB) && "Block is not dead!");
+ TerminatorInst *BBTerm = BB->getTerminator();
+
+ // Loop through all of our successors and make sure they know that one
+ // of their predecessors is going away.
+ for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i)
+ BBTerm->getSuccessor(i)->removePredecessor(BB);
+
+ // Zap all the instructions in the block.
+ while (!BB->empty()) {
+ Instruction &I = BB->back();
+ // If this instruction is used, replace uses with an arbitrary value.
+ // Because control flow can't get here, we don't care what we replace the
+ // value with. Note that since this block is unreachable, and all values
+ // contained within it must dominate their uses, that all uses will
+ // eventually be removed (they are themselves dead).
+ if (!I.use_empty())
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ BB->getInstList().pop_back();
+ }
+
+ // Zap the block!
+ BB->eraseFromParent();
+}
+
+/// FoldSingleEntryPHINodes - We know that BB has one predecessor. If there are
+/// any single-entry PHI nodes in it, fold them away. This handles the case
+/// when all entries to the PHI nodes in a block are guaranteed equal, such as
+/// when the block has exactly one predecessor.
+void llvm::FoldSingleEntryPHINodes(BasicBlock *BB) {
+ while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ if (PN->getIncomingValue(0) != PN)
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ else
+ PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+ PN->eraseFromParent();
+ }
+}
+
+
+/// DeleteDeadPHIs - Examine each PHI in the given block and delete it if it
+/// is dead. Also recursively delete any operands that become dead as
+/// a result. This includes tracing the def-use list from the PHI to see if
+/// it is ultimately unused or if it reaches an unused cycle.
+bool llvm::DeleteDeadPHIs(BasicBlock *BB) {
+ // Recursively deleting a PHI may cause multiple PHIs to be deleted
+ // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete.
+ SmallVector<WeakVH, 8> PHIs;
+ for (BasicBlock::iterator I = BB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PHIs.push_back(PN);
+
+ bool Changed = false;
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+ if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*()))
+ Changed |= RecursivelyDeleteDeadPHINode(PN);
+
+ return Changed;
+}
+
+/// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
+/// if possible. The return value indicates success or failure.
+bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
+ // Don't merge away blocks who have their address taken.
+ if (BB->hasAddressTaken()) return false;
+
+ // Can't merge if there are multiple predecessors, or no predecessors.
+ BasicBlock *PredBB = BB->getUniquePredecessor();
+ if (!PredBB) return false;
+
+ // Don't break self-loops.
+ if (PredBB == BB) return false;
+ // Don't break invokes.
+ if (isa<InvokeInst>(PredBB->getTerminator())) return false;
+
+ succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB));
+ BasicBlock* OnlySucc = BB;
+ for (; SI != SE; ++SI)
+ if (*SI != OnlySucc) {
+ OnlySucc = 0; // There are multiple distinct successors!
+ break;
+ }
+
+ // Can't merge if there are multiple successors.
+ if (!OnlySucc) return false;
+
+ // Can't merge if there is PHI loop.
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BI)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == PN)
+ return false;
+ } else
+ break;
+ }
+
+ // Begin by getting rid of unneeded PHIs.
+ while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ BB->getInstList().pop_front(); // Delete the phi node...
+ }
+
+ // Delete the unconditional branch from the predecessor...
+ PredBB->getInstList().pop_back();
+
+ // Move all definitions in the successor to the predecessor...
+ PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
+
+ // Make all PHI nodes that referred to BB now refer to Pred as their
+ // source...
+ BB->replaceAllUsesWith(PredBB);
+
+ // Inherit predecessors name if it exists.
+ if (!PredBB->hasName())
+ PredBB->takeName(BB);
+
+ // Finally, erase the old block and update dominator info.
+ if (P) {
+ if (DominatorTree* DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+ DomTreeNode* DTN = DT->getNode(BB);
+ DomTreeNode* PredDTN = DT->getNode(PredBB);
+
+ if (DTN) {
+ SmallPtrSet<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
+ for (SmallPtrSet<DomTreeNode*, 8>::iterator DI = Children.begin(),
+ DE = Children.end(); DI != DE; ++DI)
+ DT->changeImmediateDominator(*DI, PredDTN);
+
+ DT->eraseNode(BB);
+ }
+ }
+ }
+
+ BB->eraseFromParent();
+
+
+ return true;
+}
+
+/// ReplaceInstWithValue - Replace all uses of an instruction (specified by BI)
+/// with a value, then remove and delete the original instruction.
+///
+void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL,
+ BasicBlock::iterator &BI, Value *V) {
+ Instruction &I = *BI;
+ // Replaces all of the uses of the instruction with uses of the value
+ I.replaceAllUsesWith(V);
+
+ // Make sure to propagate a name if there is one already.
+ if (I.hasName() && !V->hasName())
+ V->takeName(&I);
+
+ // Delete the unnecessary instruction now...
+ BI = BIL.erase(BI);
+}
+
+
+/// ReplaceInstWithInst - Replace the instruction specified by BI with the
+/// instruction specified by I. The original instruction is deleted and BI is
+/// updated to point to the new instruction.
+///
+void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL,
+ BasicBlock::iterator &BI, Instruction *I) {
+ assert(I->getParent() == 0 &&
+ "ReplaceInstWithInst: Instruction already inserted into basic block!");
+
+ // Insert the new instruction into the basic block...
+ BasicBlock::iterator New = BIL.insert(BI, I);
+
+ // Replace all uses of the old instruction, and delete it.
+ ReplaceInstWithValue(BIL, BI, I);
+
+ // Move BI back to point to the newly inserted instruction
+ BI = New;
+}
+
+/// ReplaceInstWithInst - Replace the instruction specified by From with the
+/// instruction specified by To.
+///
+void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
+ BasicBlock::iterator BI(From);
+ ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);
+}
+
+/// RemoveSuccessor - Change the specified terminator instruction such that its
+/// successor SuccNum no longer exists. Because this reduces the outgoing
+/// degree of the current basic block, the actual terminator instruction itself
+/// may have to be changed. In the case where the last successor of the block
+/// is deleted, a return instruction is inserted in its place which can cause a
+/// surprising change in program behavior if it is not expected.
+///
+void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {
+ assert(SuccNum < TI->getNumSuccessors() &&
+ "Trying to remove a nonexistant successor!");
+
+ // If our old successor block contains any PHI nodes, remove the entry in the
+ // PHI nodes that comes from this branch...
+ //
+ BasicBlock *BB = TI->getParent();
+ TI->getSuccessor(SuccNum)->removePredecessor(BB);
+
+ TerminatorInst *NewTI = 0;
+ switch (TI->getOpcode()) {
+ case Instruction::Br:
+ // If this is a conditional branch... convert to unconditional branch.
+ if (TI->getNumSuccessors() == 2) {
+ cast<BranchInst>(TI)->setUnconditionalDest(TI->getSuccessor(1-SuccNum));
+ } else { // Otherwise convert to a return instruction...
+ Value *RetVal = 0;
+
+ // Create a value to return... if the function doesn't return null...
+ if (!BB->getParent()->getReturnType()->isVoidTy())
+ RetVal = Constant::getNullValue(BB->getParent()->getReturnType());
+
+ // Create the return...
+ NewTI = ReturnInst::Create(TI->getContext(), RetVal);
+ }
+ break;
+
+ case Instruction::Invoke: // Should convert to call
+ case Instruction::Switch: // Should remove entry
+ default:
+ case Instruction::Ret: // Cannot happen, has no successors!
+ llvm_unreachable("Unhandled terminator inst type in RemoveSuccessor!");
+ }
+
+ if (NewTI) // If it's a different instruction, replace.
+ ReplaceInstWithInst(TI, NewTI);
+}
+
+/// GetSuccessorNumber - Search for the specified successor of basic block BB
+/// and return its position in the terminator instruction's list of
+/// successors. It is an error to call this with a block that is not a
+/// successor.
+unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) {
+ TerminatorInst *Term = BB->getTerminator();
+#ifndef NDEBUG
+ unsigned e = Term->getNumSuccessors();
+#endif
+ for (unsigned i = 0; ; ++i) {
+ assert(i != e && "Didn't find edge?");
+ if (Term->getSuccessor(i) == Succ)
+ return i;
+ }
+ return 0;
+}
+
+/// SplitEdge - Split the edge connecting specified block. Pass P must
+/// not be NULL.
+BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
+ unsigned SuccNum = GetSuccessorNumber(BB, Succ);
+
+ // If this is a critical edge, let SplitCriticalEdge do it.
+ TerminatorInst *LatchTerm = BB->getTerminator();
+ if (SplitCriticalEdge(LatchTerm, SuccNum, P))
+ return LatchTerm->getSuccessor(SuccNum);
+
+ // If the edge isn't critical, then BB has a single successor or Succ has a
+ // single pred. Split the block.
+ BasicBlock::iterator SplitPoint;
+ if (BasicBlock *SP = Succ->getSinglePredecessor()) {
+ // If the successor only has a single pred, split the top of the successor
+ // block.
+ assert(SP == BB && "CFG broken");
+ SP = NULL;
+ return SplitBlock(Succ, Succ->begin(), P);
+ } else {
+ // Otherwise, if BB has a single successor, split it at the bottom of the
+ // block.
+ assert(BB->getTerminator()->getNumSuccessors() == 1 &&
+ "Should have a single succ!");
+ return SplitBlock(BB, BB->getTerminator(), P);
+ }
+}
+
+/// SplitBlock - Split the specified block at the specified instruction - every
+/// thing before SplitPt stays in Old and everything starting with SplitPt moves
+/// to a new block. The two blocks are joined by an unconditional branch and
+/// the loop info is updated.
+///
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
+ BasicBlock::iterator SplitIt = SplitPt;
+ while (isa<PHINode>(SplitIt))
+ ++SplitIt;
+ BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split");
+
+ // The new block lives in whichever loop the old one did. This preserves
+ // LCSSA as well, because we force the split point to be after any PHI nodes.
+ if (LoopInfo* LI = P->getAnalysisIfAvailable<LoopInfo>())
+ if (Loop *L = LI->getLoopFor(Old))
+ L->addBasicBlockToLoop(New, LI->getBase());
+
+ if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+ // Old dominates New. New node domiantes all other nodes dominated by Old.
+ DomTreeNode *OldNode = DT->getNode(Old);
+ std::vector<DomTreeNode *> Children;
+ for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
+ I != E; ++I)
+ Children.push_back(*I);
+
+ DomTreeNode *NewNode = DT->addNewBlock(New,Old);
+ for (std::vector<DomTreeNode *>::iterator I = Children.begin(),
+ E = Children.end(); I != E; ++I)
+ DT->changeImmediateDominator(*I, NewNode);
+ }
+
+ if (DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>())
+ DF->splitBlock(Old);
+
+ return New;
+}
+
+
+/// SplitBlockPredecessors - This method transforms BB by introducing a new
+/// basic block into the function, and moving some of the predecessors of BB to
+/// be predecessors of the new block. The new predecessors are indicated by the
+/// Preds array, which has NumPreds elements in it. The new block is given a
+/// suffix of 'Suffix'.
+///
+/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
+/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses.
+/// In particular, it does not preserve LoopSimplify (because it's
+/// complicated to handle the case where one of the edges being split
+/// is an exit of a loop with other exits).
+///
+BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
+ BasicBlock *const *Preds,
+ unsigned NumPreds, const char *Suffix,
+ Pass *P) {
+ // Create new basic block, insert right before the original block.
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
+ BB->getParent(), BB);
+
+ // The new block unconditionally branches to the old block.
+ BranchInst *BI = BranchInst::Create(BB, NewBB);
+
+ LoopInfo *LI = P ? P->getAnalysisIfAvailable<LoopInfo>() : 0;
+ Loop *L = LI ? LI->getLoopFor(BB) : 0;
+ bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID);
+
+ // Move the edges from Preds to point to NewBB instead of BB.
+ // While here, if we need to preserve loop analyses, collect
+ // some information about how this split will affect loops.
+ bool HasLoopExit = false;
+ bool IsLoopEntry = !!L;
+ bool SplitMakesNewLoopHeader = false;
+ for (unsigned i = 0; i != NumPreds; ++i) {
+ // This is slightly more strict than necessary; the minimum requirement
+ // is that there be no more than one indirectbr branching to BB. And
+ // all BlockAddress uses would need to be updated.
+ assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
+ "Cannot split an edge from an IndirectBrInst");
+
+ Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
+
+ if (LI) {
+ // If we need to preserve LCSSA, determine if any of
+ // the preds is a loop exit.
+ if (PreserveLCSSA)
+ if (Loop *PL = LI->getLoopFor(Preds[i]))
+ if (!PL->contains(BB))
+ HasLoopExit = true;
+ // If we need to preserve LoopInfo, note whether any of the
+ // preds crosses an interesting loop boundary.
+ if (L) {
+ if (L->contains(Preds[i]))
+ IsLoopEntry = false;
+ else
+ SplitMakesNewLoopHeader = true;
+ }
+ }
+ }
+
+ // Update dominator tree and dominator frontier if available.
+ DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0;
+ if (DT)
+ DT->splitBlock(NewBB);
+ if (DominanceFrontier *DF =
+ P ? P->getAnalysisIfAvailable<DominanceFrontier>() : 0)
+ DF->splitBlock(NewBB);
+
+ // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
+ // node becomes an incoming value for BB's phi node. However, if the Preds
+ // list is empty, we need to insert dummy entries into the PHI nodes in BB to
+ // account for the newly created predecessor.
+ if (NumPreds == 0) {
+ // Insert dummy values as the incoming value.
+ for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
+ cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
+ return NewBB;
+ }
+
+ AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0;
+
+ if (L) {
+ if (IsLoopEntry) {
+ // Add the new block to the nearest enclosing loop (and not an
+ // adjacent loop). To find this, examine each of the predecessors and
+ // determine which loops enclose them, and select the most-nested loop
+ // which contains the loop containing the block being split.
+ Loop *InnermostPredLoop = 0;
+ for (unsigned i = 0; i != NumPreds; ++i)
+ if (Loop *PredLoop = LI->getLoopFor(Preds[i])) {
+ // Seek a loop which actually contains the block being split (to
+ // avoid adjacent loops).
+ while (PredLoop && !PredLoop->contains(BB))
+ PredLoop = PredLoop->getParentLoop();
+ // Select the most-nested of these loops which contains the block.
+ if (PredLoop &&
+ PredLoop->contains(BB) &&
+ (!InnermostPredLoop ||
+ InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth()))
+ InnermostPredLoop = PredLoop;
+ }
+ if (InnermostPredLoop)
+ InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ } else {
+ L->addBasicBlockToLoop(NewBB, LI->getBase());
+ if (SplitMakesNewLoopHeader)
+ L->moveToHeader(NewBB);
+ }
+ }
+
+ // Otherwise, create a new PHI node in NewBB for each PHI node in BB.
+ for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) {
+ PHINode *PN = cast<PHINode>(I++);
+
+ // Check to see if all of the values coming in are the same. If so, we
+ // don't need to create a new PHI node, unless it's needed for LCSSA.
+ Value *InVal = 0;
+ if (!HasLoopExit) {
+ InVal = PN->getIncomingValueForBlock(Preds[0]);
+ for (unsigned i = 1; i != NumPreds; ++i)
+ if (InVal != PN->getIncomingValueForBlock(Preds[i])) {
+ InVal = 0;
+ break;
+ }
+ }
+
+ if (InVal) {
+ // If all incoming values for the new PHI would be the same, just don't
+ // make a new PHI. Instead, just remove the incoming values from the old
+ // PHI.
+ for (unsigned i = 0; i != NumPreds; ++i)
+ PN->removeIncomingValue(Preds[i], false);
+ } else {
+ // If the values coming into the block are not the same, we need a PHI.
+ // Create the new PHI node, insert it into NewBB at the end of the block
+ PHINode *NewPHI =
+ PHINode::Create(PN->getType(), PN->getName()+".ph", BI);
+ if (AA) AA->copyValue(PN, NewPHI);
+
+ // Move all of the PHI values for 'Preds' to the new PHI.
+ for (unsigned i = 0; i != NumPreds; ++i) {
+ Value *V = PN->removeIncomingValue(Preds[i], false);
+ NewPHI->addIncoming(V, Preds[i]);
+ }
+ InVal = NewPHI;
+ }
+
+ // Add an incoming value to the PHI node in the loop for the preheader
+ // edge.
+ PN->addIncoming(InVal, NewBB);
+ }
+
+ return NewBB;
+}
+
+/// FindFunctionBackedges - Analyze the specified function to find all of the
+/// loop backedges in the function and return them. This is a relatively cheap
+/// (compared to computing dominators and loop info) analysis.
+///
+/// The output is added to Result, as pairs of <from,to> edge info.
+void llvm::FindFunctionBackedges(const Function &F,
+ SmallVectorImpl<std::pair<const BasicBlock*,const BasicBlock*> > &Result) {
+ const BasicBlock *BB = &F.getEntryBlock();
+ if (succ_begin(BB) == succ_end(BB))
+ return;
+
+ SmallPtrSet<const BasicBlock*, 8> Visited;
+ SmallVector<std::pair<const BasicBlock*, succ_const_iterator>, 8> VisitStack;
+ SmallPtrSet<const BasicBlock*, 8> InStack;
+
+ Visited.insert(BB);
+ VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
+ InStack.insert(BB);
+ do {
+ std::pair<const BasicBlock*, succ_const_iterator> &Top = VisitStack.back();
+ const BasicBlock *ParentBB = Top.first;
+ succ_const_iterator &I = Top.second;
+
+ bool FoundNew = false;
+ while (I != succ_end(ParentBB)) {
+ BB = *I++;
+ if (Visited.insert(BB)) {
+ FoundNew = true;
+ break;
+ }
+ // Successor is in VisitStack, it's a back edge.
+ if (InStack.count(BB))
+ Result.push_back(std::make_pair(ParentBB, BB));
+ }
+
+ if (FoundNew) {
+ // Go down one level if there is a unvisited successor.
+ InStack.insert(BB);
+ VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
+ } else {
+ // Go up one level.
+ InStack.erase(VisitStack.pop_back_val().first);
+ }
+ } while (!VisitStack.empty());
+
+
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicInliner.cpp b/contrib/llvm/lib/Transforms/Utils/BasicInliner.cpp
new file mode 100644
index 0000000..23a30cc5
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BasicInliner.cpp
@@ -0,0 +1,182 @@
+//===- BasicInliner.cpp - Basic function level inliner --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a simple function based inliner that does not use
+// call graph information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "basicinliner"
+#include "llvm/Module.h"
+#include "llvm/Function.h"
+#include "llvm/Transforms/Utils/BasicInliner.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <vector>
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+BasicInlineThreshold("basic-inline-threshold", cl::Hidden, cl::init(200),
+ cl::desc("Control the amount of basic inlining to perform (default = 200)"));
+
+namespace llvm {
+
+ /// BasicInlinerImpl - BasicInliner implemantation class. This hides
+ /// container info, used by basic inliner, from public interface.
+ struct BasicInlinerImpl {
+
+ BasicInlinerImpl(const BasicInlinerImpl&); // DO NOT IMPLEMENT
+ void operator=(const BasicInlinerImpl&); // DO NO IMPLEMENT
+ public:
+ BasicInlinerImpl(TargetData *T) : TD(T) {}
+
+ /// addFunction - Add function into the list of functions to process.
+ /// All functions must be inserted using this interface before invoking
+ /// inlineFunctions().
+ void addFunction(Function *F) {
+ Functions.push_back(F);
+ }
+
+ /// neverInlineFunction - Sometimes a function is never to be inlined
+ /// because of one or other reason.
+ void neverInlineFunction(Function *F) {
+ NeverInline.insert(F);
+ }
+
+ /// inlineFuctions - Walk all call sites in all functions supplied by
+ /// client. Inline as many call sites as possible. Delete completely
+ /// inlined functions.
+ void inlineFunctions();
+
+ private:
+ TargetData *TD;
+ std::vector<Function *> Functions;
+ SmallPtrSet<const Function *, 16> NeverInline;
+ SmallPtrSet<Function *, 8> DeadFunctions;
+ InlineCostAnalyzer CA;
+ };
+
+/// inlineFuctions - Walk all call sites in all functions supplied by
+/// client. Inline as many call sites as possible. Delete completely
+/// inlined functions.
+void BasicInlinerImpl::inlineFunctions() {
+
+ // Scan through and identify all call sites ahead of time so that we only
+ // inline call sites in the original functions, not call sites that result
+ // from inlining other functions.
+ std::vector<CallSite> CallSites;
+
+ for (std::vector<Function *>::iterator FI = Functions.begin(),
+ FE = Functions.end(); FI != FE; ++FI) {
+ Function *F = *FI;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ CallSite CS(cast<Value>(I));
+ if (CS && CS.getCalledFunction()
+ && !CS.getCalledFunction()->isDeclaration())
+ CallSites.push_back(CS);
+ }
+ }
+
+ DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
+
+ // Inline call sites.
+ bool Changed = false;
+ do {
+ Changed = false;
+ for (unsigned index = 0; index != CallSites.size() && !CallSites.empty();
+ ++index) {
+ CallSite CS = CallSites[index];
+ if (Function *Callee = CS.getCalledFunction()) {
+
+ // Eliminate calls that are never inlinable.
+ if (Callee->isDeclaration() ||
+ CS.getInstruction()->getParent()->getParent() == Callee) {
+ CallSites.erase(CallSites.begin() + index);
+ --index;
+ continue;
+ }
+ InlineCost IC = CA.getInlineCost(CS, NeverInline);
+ if (IC.isAlways()) {
+ DEBUG(dbgs() << " Inlining: cost=always"
+ <<", call: " << *CS.getInstruction());
+ } else if (IC.isNever()) {
+ DEBUG(dbgs() << " NOT Inlining: cost=never"
+ <<", call: " << *CS.getInstruction());
+ continue;
+ } else {
+ int Cost = IC.getValue();
+
+ if (Cost >= (int) BasicInlineThreshold) {
+ DEBUG(dbgs() << " NOT Inlining: cost = " << Cost
+ << ", call: " << *CS.getInstruction());
+ continue;
+ } else {
+ DEBUG(dbgs() << " Inlining: cost = " << Cost
+ << ", call: " << *CS.getInstruction());
+ }
+ }
+
+ // Inline
+ InlineFunctionInfo IFI(0, TD);
+ if (InlineFunction(CS, IFI)) {
+ if (Callee->use_empty() && (Callee->hasLocalLinkage() ||
+ Callee->hasAvailableExternallyLinkage()))
+ DeadFunctions.insert(Callee);
+ Changed = true;
+ CallSites.erase(CallSites.begin() + index);
+ --index;
+ }
+ }
+ }
+ } while (Changed);
+
+ // Remove completely inlined functions from module.
+ for(SmallPtrSet<Function *, 8>::iterator I = DeadFunctions.begin(),
+ E = DeadFunctions.end(); I != E; ++I) {
+ Function *D = *I;
+ Module *M = D->getParent();
+ M->getFunctionList().remove(D);
+ }
+}
+
+BasicInliner::BasicInliner(TargetData *TD) {
+ Impl = new BasicInlinerImpl(TD);
+}
+
+BasicInliner::~BasicInliner() {
+ delete Impl;
+}
+
+/// addFunction - Add function into the list of functions to process.
+/// All functions must be inserted using this interface before invoking
+/// inlineFunctions().
+void BasicInliner::addFunction(Function *F) {
+ Impl->addFunction(F);
+}
+
+/// neverInlineFunction - Sometimes a function is never to be inlined because
+/// of one or other reason.
+void BasicInliner::neverInlineFunction(Function *F) {
+ Impl->neverInlineFunction(F);
+}
+
+/// inlineFuctions - Walk all call sites in all functions supplied by
+/// client. Inline as many call sites as possible. Delete completely
+/// inlined functions.
+void BasicInliner::inlineFunctions() {
+ Impl->inlineFunctions();
+}
+
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
new file mode 100644
index 0000000..f75ffe6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -0,0 +1,443 @@
+//===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// BreakCriticalEdges pass - Break all of the critical edges in the CFG by
+// inserting a dummy basic block. This pass may be "required" by passes that
+// cannot deal with critical edges. For this usage, the structure type is
+// forward declared. This pass obviously invalidates the CFG, but can update
+// forward dominator (set, immediate dominators, tree, and frontier)
+// information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "break-crit-edges"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumBroken, "Number of blocks inserted");
+
+namespace {
+ struct BreakCriticalEdges : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ BreakCriticalEdges() : FunctionPass(ID) {}
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominanceFrontier>();
+ AU.addPreserved<LoopInfo>();
+ AU.addPreserved<ProfileInfo>();
+
+ // No loop canonicalization guarantees are broken by this pass.
+ AU.addPreservedID(LoopSimplifyID);
+ }
+ };
+}
+
+char BreakCriticalEdges::ID = 0;
+INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
+ "Break critical edges in CFG", false, false);
+
+// Publically exposed interface to pass...
+char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID;
+FunctionPass *llvm::createBreakCriticalEdgesPass() {
+ return new BreakCriticalEdges();
+}
+
+// runOnFunction - Loop over all of the edges in the CFG, breaking critical
+// edges as they are found.
+//
+bool BreakCriticalEdges::runOnFunction(Function &F) {
+ bool Changed = false;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ TerminatorInst *TI = I->getTerminator();
+ if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ if (SplitCriticalEdge(TI, i, this)) {
+ ++NumBroken;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Implementation of the external critical edge manipulation functions
+//===----------------------------------------------------------------------===//
+
+// isCriticalEdge - Return true if the specified edge is a critical edge.
+// Critical edges are edges from a block with multiple successors to a block
+// with multiple predecessors.
+//
+bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
+ bool AllowIdenticalEdges) {
+ assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!");
+ if (TI->getNumSuccessors() == 1) return false;
+
+ const BasicBlock *Dest = TI->getSuccessor(SuccNum);
+ const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest);
+
+ // If there is more than one predecessor, this is a critical edge...
+ assert(I != E && "No preds, but we have an edge to the block?");
+ const BasicBlock *FirstPred = *I;
+ ++I; // Skip one edge due to the incoming arc from TI.
+ if (!AllowIdenticalEdges)
+ return I != E;
+
+ // If AllowIdenticalEdges is true, then we allow this edge to be considered
+ // non-critical iff all preds come from TI's block.
+ while (I != E) {
+ const BasicBlock *P = *I;
+ if (P != FirstPred)
+ return true;
+ // Note: leave this as is until no one ever compiles with either gcc 4.0.1
+ // or Xcode 2. This seems to work around the pred_iterator assert in PR 2207
+ E = pred_end(P);
+ ++I;
+ }
+ return false;
+}
+
+/// CreatePHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form
+/// may require new PHIs in the new exit block. This function inserts the
+/// new PHIs, as needed. Preds is a list of preds inside the loop, SplitBB
+/// is the new loop exit block, and DestBB is the old loop exit, now the
+/// successor of SplitBB.
+static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds,
+ BasicBlock *SplitBB,
+ BasicBlock *DestBB) {
+ // SplitBB shouldn't have anything non-trivial in it yet.
+ assert(SplitBB->getFirstNonPHI() == SplitBB->getTerminator() &&
+ "SplitBB has non-PHI nodes!");
+
+ // For each PHI in the destination block...
+ for (BasicBlock::iterator I = DestBB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ unsigned Idx = PN->getBasicBlockIndex(SplitBB);
+ Value *V = PN->getIncomingValue(Idx);
+ // If the input is a PHI which already satisfies LCSSA, don't create
+ // a new one.
+ if (const PHINode *VP = dyn_cast<PHINode>(V))
+ if (VP->getParent() == SplitBB)
+ continue;
+ // Otherwise a new PHI is needed. Create one and populate it.
+ PHINode *NewPN = PHINode::Create(PN->getType(), "split",
+ SplitBB->getTerminator());
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i)
+ NewPN->addIncoming(V, Preds[i]);
+ // Update the original PHI.
+ PN->setIncomingValue(Idx, NewPN);
+ }
+}
+
+/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to
+/// split the critical edge. This will update DominatorTree and
+/// DominatorFrontier information if it is available, thus calling this pass
+/// will not invalidate either of them. This returns the new block if the edge
+/// was split, null otherwise.
+///
+/// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the
+/// specified successor will be merged into the same critical edge block.
+/// This is most commonly interesting with switch instructions, which may
+/// have many edges to any one destination. This ensures that all edges to that
+/// dest go to one block instead of each going to a different block, but isn't
+/// the standard definition of a "critical edge".
+///
+/// It is invalid to call this function on a critical edge that starts at an
+/// IndirectBrInst. Splitting these edges will almost always create an invalid
+/// program because the address of the new block won't be the one that is jumped
+/// to.
+///
+BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
+ Pass *P, bool MergeIdenticalEdges) {
+ if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0;
+
+ assert(!isa<IndirectBrInst>(TI) &&
+ "Cannot split critical edge from IndirectBrInst");
+
+ BasicBlock *TIBB = TI->getParent();
+ BasicBlock *DestBB = TI->getSuccessor(SuccNum);
+
+ // Create a new basic block, linking it into the CFG.
+ BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
+ TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
+ // Create our unconditional branch.
+ BranchInst::Create(DestBB, NewBB);
+
+ // Branch to the new block, breaking the edge.
+ TI->setSuccessor(SuccNum, NewBB);
+
+ // Insert the block into the function... right after the block TI lives in.
+ Function &F = *TIBB->getParent();
+ Function::iterator FBBI = TIBB;
+ F.getBasicBlockList().insert(++FBBI, NewBB);
+
+ // If there are any PHI nodes in DestBB, we need to update them so that they
+ // merge incoming values from NewBB instead of from TIBB.
+ if (PHINode *APHI = dyn_cast<PHINode>(DestBB->begin())) {
+ // This conceptually does:
+ // foreach (PHINode *PN in DestBB)
+ // PN->setIncomingBlock(PN->getIncomingBlock(TIBB), NewBB);
+ // but is optimized for two cases.
+
+ if (APHI->getNumIncomingValues() <= 8) { // Small # preds case.
+ unsigned BBIdx = 0;
+ for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
+ // We no longer enter through TIBB, now we come in through NewBB.
+ // Revector exactly one entry in the PHI node that used to come from
+ // TIBB to come from NewBB.
+ PHINode *PN = cast<PHINode>(I);
+
+ // Reuse the previous value of BBIdx if it lines up. In cases where we
+ // have multiple phi nodes with *lots* of predecessors, this is a speed
+ // win because we don't have to scan the PHI looking for TIBB. This
+ // happens because the BB list of PHI nodes are usually in the same
+ // order.
+ if (PN->getIncomingBlock(BBIdx) != TIBB)
+ BBIdx = PN->getBasicBlockIndex(TIBB);
+ PN->setIncomingBlock(BBIdx, NewBB);
+ }
+ } else {
+ // However, the foreach loop is slow for blocks with lots of predecessors
+ // because PHINode::getIncomingBlock is O(n) in # preds. Instead, walk
+ // the user list of TIBB to find the PHI nodes.
+ SmallPtrSet<PHINode*, 16> UpdatedPHIs;
+
+ for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end();
+ UI != E; ) {
+ Value::use_iterator Use = UI++;
+ if (PHINode *PN = dyn_cast<PHINode>(*Use)) {
+ // Remove one entry from each PHI.
+ if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN))
+ PN->setOperand(Use.getOperandNo(), NewBB);
+ }
+ }
+ }
+ }
+
+ // If there are any other edges from TIBB to DestBB, update those to go
+ // through the split block, making those edges non-critical as well (and
+ // reducing the number of phi entries in the DestBB if relevant).
+ if (MergeIdenticalEdges) {
+ for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) {
+ if (TI->getSuccessor(i) != DestBB) continue;
+
+ // Remove an entry for TIBB from DestBB phi nodes.
+ DestBB->removePredecessor(TIBB);
+
+ // We found another edge to DestBB, go to NewBB instead.
+ TI->setSuccessor(i, NewBB);
+ }
+ }
+
+
+
+ // If we don't have a pass object, we can't update anything...
+ if (P == 0) return NewBB;
+
+ DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+ DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>();
+ LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
+ ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
+
+ // If we have nothing to update, just return.
+ if (DT == 0 && DF == 0 && LI == 0 && PI == 0)
+ return NewBB;
+
+ // Now update analysis information. Since the only predecessor of NewBB is
+ // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate
+ // anything, as there are other successors of DestBB. However, if all other
+ // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a
+ // loop header) then NewBB dominates DestBB.
+ SmallVector<BasicBlock*, 8> OtherPreds;
+
+ // If there is a PHI in the block, loop over predecessors with it, which is
+ // faster than iterating pred_begin/end.
+ if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingBlock(i) != NewBB)
+ OtherPreds.push_back(PN->getIncomingBlock(i));
+ } else {
+ for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB);
+ I != E; ++I) {
+ BasicBlock *P = *I;
+ if (P != NewBB)
+ OtherPreds.push_back(P);
+ }
+ }
+
+ bool NewBBDominatesDestBB = true;
+
+ // Should we update DominatorTree information?
+ if (DT) {
+ DomTreeNode *TINode = DT->getNode(TIBB);
+
+ // The new block is not the immediate dominator for any other nodes, but
+ // TINode is the immediate dominator for the new node.
+ //
+ if (TINode) { // Don't break unreachable code!
+ DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB);
+ DomTreeNode *DestBBNode = 0;
+
+ // If NewBBDominatesDestBB hasn't been computed yet, do so with DT.
+ if (!OtherPreds.empty()) {
+ DestBBNode = DT->getNode(DestBB);
+ while (!OtherPreds.empty() && NewBBDominatesDestBB) {
+ if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back()))
+ NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode);
+ OtherPreds.pop_back();
+ }
+ OtherPreds.clear();
+ }
+
+ // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it
+ // doesn't dominate anything.
+ if (NewBBDominatesDestBB) {
+ if (!DestBBNode) DestBBNode = DT->getNode(DestBB);
+ DT->changeImmediateDominator(DestBBNode, NewBBNode);
+ }
+ }
+ }
+
+ // Should we update DominanceFrontier information?
+ if (DF) {
+ // If NewBBDominatesDestBB hasn't been computed yet, do so with DF.
+ if (!OtherPreds.empty()) {
+ // FIXME: IMPLEMENT THIS!
+ llvm_unreachable("Requiring domfrontiers but not idom/domtree/domset."
+ " not implemented yet!");
+ }
+
+ // Since the new block is dominated by its only predecessor TIBB,
+ // it cannot be in any block's dominance frontier. If NewBB dominates
+ // DestBB, its dominance frontier is the same as DestBB's, otherwise it is
+ // just {DestBB}.
+ DominanceFrontier::DomSetType NewDFSet;
+ if (NewBBDominatesDestBB) {
+ DominanceFrontier::iterator I = DF->find(DestBB);
+ if (I != DF->end()) {
+ DF->addBasicBlock(NewBB, I->second);
+
+ if (I->second.count(DestBB)) {
+ // However NewBB's frontier does not include DestBB.
+ DominanceFrontier::iterator NF = DF->find(NewBB);
+ DF->removeFromFrontier(NF, DestBB);
+ }
+ }
+ else
+ DF->addBasicBlock(NewBB, DominanceFrontier::DomSetType());
+ } else {
+ DominanceFrontier::DomSetType NewDFSet;
+ NewDFSet.insert(DestBB);
+ DF->addBasicBlock(NewBB, NewDFSet);
+ }
+ }
+
+ // Update LoopInfo if it is around.
+ if (LI) {
+ if (Loop *TIL = LI->getLoopFor(TIBB)) {
+ // If one or the other blocks were not in a loop, the new block is not
+ // either, and thus LI doesn't need to be updated.
+ if (Loop *DestLoop = LI->getLoopFor(DestBB)) {
+ if (TIL == DestLoop) {
+ // Both in the same loop, the NewBB joins loop.
+ DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ } else if (TIL->contains(DestLoop)) {
+ // Edge from an outer loop to an inner loop. Add to the outer loop.
+ TIL->addBasicBlockToLoop(NewBB, LI->getBase());
+ } else if (DestLoop->contains(TIL)) {
+ // Edge from an inner loop to an outer loop. Add to the outer loop.
+ DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ } else {
+ // Edge from two loops with no containment relation. Because these
+ // are natural loops, we know that the destination block must be the
+ // header of its loop (adding a branch into a loop elsewhere would
+ // create an irreducible loop).
+ assert(DestLoop->getHeader() == DestBB &&
+ "Should not create irreducible loops!");
+ if (Loop *P = DestLoop->getParentLoop())
+ P->addBasicBlockToLoop(NewBB, LI->getBase());
+ }
+ }
+ // If TIBB is in a loop and DestBB is outside of that loop, split the
+ // other exit blocks of the loop that also have predecessors outside
+ // the loop, to maintain a LoopSimplify guarantee.
+ if (!TIL->contains(DestBB) &&
+ P->mustPreserveAnalysisID(LoopSimplifyID)) {
+ assert(!TIL->contains(NewBB) &&
+ "Split point for loop exit is contained in loop!");
+
+ // Update LCSSA form in the newly created exit block.
+ if (P->mustPreserveAnalysisID(LCSSAID)) {
+ SmallVector<BasicBlock *, 1> OrigPred;
+ OrigPred.push_back(TIBB);
+ CreatePHIsForSplitLoopExit(OrigPred, NewBB, DestBB);
+ }
+
+ // For each unique exit block...
+ SmallVector<BasicBlock *, 4> ExitBlocks;
+ TIL->getExitBlocks(ExitBlocks);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ // Collect all the preds that are inside the loop, and note
+ // whether there are any preds outside the loop.
+ SmallVector<BasicBlock *, 4> Preds;
+ bool HasPredOutsideOfLoop = false;
+ BasicBlock *Exit = ExitBlocks[i];
+ for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit);
+ I != E; ++I) {
+ BasicBlock *P = *I;
+ if (TIL->contains(P))
+ Preds.push_back(P);
+ else
+ HasPredOutsideOfLoop = true;
+ }
+ // If there are any preds not in the loop, we'll need to split
+ // the edges. The Preds.empty() check is needed because a block
+ // may appear multiple times in the list. We can't use
+ // getUniqueExitBlocks above because that depends on LoopSimplify
+ // form, which we're in the process of restoring!
+ if (!Preds.empty() && HasPredOutsideOfLoop) {
+ BasicBlock *NewExitBB =
+ SplitBlockPredecessors(Exit, Preds.data(), Preds.size(),
+ "split", P);
+ if (P->mustPreserveAnalysisID(LCSSAID))
+ CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit);
+ }
+ }
+ }
+ // LCSSA form was updated above for the case where LoopSimplify is
+ // available, which means that all predecessors of loop exit blocks
+ // are within the loop. Without LoopSimplify form, it would be
+ // necessary to insert a new phi.
+ assert((!P->mustPreserveAnalysisID(LCSSAID) ||
+ P->mustPreserveAnalysisID(LoopSimplifyID)) &&
+ "SplitCriticalEdge doesn't know how to update LCCSA form "
+ "without LoopSimplify!");
+ }
+ }
+
+ // Update ProfileInfo if it is around.
+ if (PI)
+ PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges);
+
+ return NewBB;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
new file mode 100644
index 0000000..c313949
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -0,0 +1,527 @@
+//===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some functions that will create standard C libcalls.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Type.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Intrinsics.h"
+
+using namespace llvm;
+
+/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
+Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
+ return B.CreateBitCast(V, B.getInt8PtrTy(), "cstr");
+}
+
+/// EmitStrLen - Emit a call to the strlen function to the builder, for the
+/// specified pointer. This always returns an integer value of size intptr_t.
+Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[2];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+ Attribute::NoUnwind);
+
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2),
+ TD->getIntPtrType(Context),
+ B.getInt8PtrTy(),
+ NULL);
+ CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
+ if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitStrChr - Emit a call to the strchr function to the builder, for the
+/// specified pointer and character. Ptr is required to be some pointer type,
+/// and the return value has 'i8*' type.
+Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
+ const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI =
+ AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
+
+ const Type *I8Ptr = B.getInt8PtrTy();
+ const Type *I32Ty = B.getInt32Ty();
+ Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1),
+ I8Ptr, I8Ptr, I32Ty, NULL);
+ CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
+ ConstantInt::get(I32Ty, C), "strchr");
+ if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitStrNCmp - Emit a call to the strncmp function to the builder.
+Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
+ IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[3];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+ AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+ Attribute::NoUnwind);
+
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI, 3),
+ B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context), NULL);
+ CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B),
+ CastToCStr(Ptr2, B), Len, "strncmp");
+
+ if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
+/// specified pointer arguments.
+Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+ const TargetData *TD, StringRef Name) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[2];
+ AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ const Type *I8Ptr = B.getInt8PtrTy();
+ Value *StrCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2),
+ I8Ptr, I8Ptr, I8Ptr, NULL);
+ CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
+ Name);
+ if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
+/// specified pointer arguments.
+Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
+ IRBuilder<> &B, const TargetData *TD, StringRef Name) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[2];
+ AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ const Type *I8Ptr = B.getInt8PtrTy();
+ Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2),
+ I8Ptr, I8Ptr, I8Ptr,
+ Len->getType(), NULL);
+ CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
+ Len, "strncpy");
+ if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+
+/// EmitMemCpy - Emit a call to the memcpy function to the builder. This always
+/// expects that Len has type 'intptr_t' and Dst/Src are pointers.
+Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align,
+ bool isVolatile, IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Dst = CastToCStr(Dst, B);
+ Src = CastToCStr(Src, B);
+ const Type *ArgTys[3] = { Dst->getType(), Src->getType(), Len->getType() };
+ Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, ArgTys, 3);
+ return B.CreateCall5(MemCpy, Dst, Src, Len,
+ ConstantInt::get(B.getInt32Ty(), Align),
+ ConstantInt::get(B.getInt1Ty(), isVolatile));
+}
+
+/// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder.
+/// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
+/// are pointers.
+Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
+ IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI;
+ AWI = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *MemCpy = M->getOrInsertFunction("__memcpy_chk",
+ AttrListPtr::get(&AWI, 1),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context),
+ TD->getIntPtrType(Context), NULL);
+ Dst = CastToCStr(Dst, B);
+ Src = CastToCStr(Src, B);
+ CallInst *CI = B.CreateCall4(MemCpy, Dst, Src, Len, ObjSize);
+ if (const Function *F = dyn_cast<Function>(MemCpy->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitMemMove - Emit a call to the memmove function to the builder. This
+/// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
+Value *llvm::EmitMemMove(Value *Dst, Value *Src, Value *Len, unsigned Align,
+ bool isVolatile, IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ const Type *ArgTys[3] = { Dst->getType(), Src->getType(),
+ TD->getIntPtrType(Context) };
+ Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, ArgTys, 3);
+ Dst = CastToCStr(Dst, B);
+ Src = CastToCStr(Src, B);
+ Value *A = ConstantInt::get(B.getInt32Ty(), Align);
+ Value *Vol = ConstantInt::get(B.getInt1Ty(), isVolatile);
+ return B.CreateCall5(MemMove, Dst, Src, Len, A, Vol);
+}
+
+/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is
+/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
+Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
+ Value *Len, IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI;
+ AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ B.getInt32Ty(),
+ TD->getIntPtrType(Context),
+ NULL);
+ CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
+
+ if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitMemCmp - Emit a call to the memcmp function.
+Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
+ Value *Len, IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[3];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+ AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+ Attribute::NoUnwind);
+
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3),
+ B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context), NULL);
+ CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
+ Len, "memcmp");
+
+ if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitMemSet - Emit a call to the memset function
+Value *llvm::EmitMemSet(Value *Dst, Value *Val, Value *Len, bool isVolatile,
+ IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Intrinsic::ID IID = Intrinsic::memset;
+ const Type *Tys[2] = { Dst->getType(), Len->getType() };
+ Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 2);
+ Value *Align = ConstantInt::get(B.getInt32Ty(), 1);
+ Value *Vol = ConstantInt::get(B.getInt1Ty(), isVolatile);
+ return B.CreateCall5(MemSet, CastToCStr(Dst, B), Val, Len, Align, Vol);
+}
+
+/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
+/// 'floor'). This function is known to take a single of type matching 'Op' and
+/// returns one value with the same type. If 'Op' is a long double, 'l' is
+/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
+Value *llvm::EmitUnaryFloatFnCall(Value *Op, const char *Name,
+ IRBuilder<> &B, const AttrListPtr &Attrs) {
+ char NameBuffer[20];
+ if (!Op->getType()->isDoubleTy()) {
+ // If we need to add a suffix, copy into NameBuffer.
+ unsigned NameLen = strlen(Name);
+ assert(NameLen < sizeof(NameBuffer)-2);
+ memcpy(NameBuffer, Name, NameLen);
+ if (Op->getType()->isFloatTy())
+ NameBuffer[NameLen] = 'f'; // floorf
+ else
+ NameBuffer[NameLen] = 'l'; // floorl
+ NameBuffer[NameLen+1] = 0;
+ Name = NameBuffer;
+ }
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+ Op->getType(), NULL);
+ CallInst *CI = B.CreateCall(Callee, Op, Name);
+ CI->setAttributes(Attrs);
+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
+/// is an integer.
+Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(),
+ B.getInt32Ty(), NULL);
+ CallInst *CI = B.CreateCall(PutChar,
+ B.CreateIntCast(Char,
+ B.getInt32Ty(),
+ /*isSigned*/true,
+ "chari"),
+ "putchar");
+
+ if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitPutS - Emit a call to the puts function. This assumes that Str is
+/// some pointer.
+void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[2];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+
+ Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
+ B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ NULL);
+ CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
+ if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+}
+
+/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is
+/// an integer and File is a pointer to FILE.
+void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
+ const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[2];
+ AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ Constant *F;
+ if (File->getType()->isPointerTy())
+ F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2),
+ B.getInt32Ty(),
+ B.getInt32Ty(), File->getType(),
+ NULL);
+ else
+ F = M->getOrInsertFunction("fputc",
+ B.getInt32Ty(),
+ B.getInt32Ty(),
+ File->getType(), NULL);
+ Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
+ "chari");
+ CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+}
+
+/// EmitFPutS - Emit a call to the puts function. Str is required to be a
+/// pointer and File is a pointer to FILE.
+void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
+ const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[3];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+ AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ Constant *F;
+ if (File->getType()->isPointerTy())
+ F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3),
+ B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ File->getType(), NULL);
+ else
+ F = M->getOrInsertFunction("fputs", B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ File->getType(), NULL);
+ CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+}
+
+/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is
+/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
+void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
+ IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[3];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture);
+ AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Constant *F;
+ if (File->getType()->isPointerTy())
+ F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3),
+ TD->getIntPtrType(Context),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context),
+ TD->getIntPtrType(Context),
+ File->getType(), NULL);
+ else
+ F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(Context),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context),
+ TD->getIntPtrType(Context),
+ File->getType(), NULL);
+ CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
+ ConstantInt::get(TD->getIntPtrType(Context), 1), File);
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+}
+
+SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { }
+
+bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
+ // We really need TargetData for later.
+ if (!TD) return false;
+
+ this->CI = CI;
+ Function *Callee = CI->getCalledFunction();
+ StringRef Name = Callee->getName();
+ const FunctionType *FT = Callee->getFunctionType();
+ BasicBlock *BB = CI->getParent();
+ LLVMContext &Context = CI->getParent()->getContext();
+ IRBuilder<> B(Context);
+
+ // Set the builder to the instruction after the call.
+ B.SetInsertPoint(BB, CI);
+
+ if (Name == "__memcpy_chk") {
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return false;
+
+ if (isFoldable(3, 2, false)) {
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1, false, B, TD);
+ replaceCall(CI->getArgOperand(0));
+ return true;
+ }
+ return false;
+ }
+
+ // Should be similar to memcpy.
+ if (Name == "__mempcpy_chk") {
+ return false;
+ }
+
+ if (Name == "__memmove_chk") {
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return false;
+
+ if (isFoldable(3, 2, false)) {
+ EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1, false, B, TD);
+ replaceCall(CI->getArgOperand(0));
+ return true;
+ }
+ return false;
+ }
+
+ if (Name == "__memset_chk") {
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ FT->getParamType(2) != TD->getIntPtrType(Context) ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return false;
+
+ if (isFoldable(3, 2, false)) {
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
+ false);
+ EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2),
+ false, B, TD);
+ replaceCall(CI->getArgOperand(0));
+ return true;
+ }
+ return false;
+ }
+
+ if (Name == "__strcpy_chk" || Name == "__stpcpy_chk") {
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 3 ||
+ FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+ FT->getParamType(2) != TD->getIntPtrType(Context))
+ return 0;
+
+
+ // If a) we don't have any length information, or b) we know this will
+ // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
+ // st[rp]cpy_chk call which may fail at runtime if the size is too long.
+ // TODO: It might be nice to get a maximum length out of the possible
+ // string lengths for varying.
+ if (isFoldable(2, 1, true)) {
+ Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD,
+ Name.substr(2, 6));
+ replaceCall(Ret);
+ return true;
+ }
+ return false;
+ }
+
+ if (Name == "__strncpy_chk" || Name == "__stpncpy_chk") {
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+ !FT->getParamType(2)->isIntegerTy() ||
+ FT->getParamType(3) != TD->getIntPtrType(Context))
+ return false;
+
+ if (isFoldable(3, 2, false)) {
+ Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TD, Name.substr(2, 7));
+ replaceCall(Ret);
+ return true;
+ }
+ return false;
+ }
+
+ if (Name == "__strcat_chk") {
+ return false;
+ }
+
+ if (Name == "__strncat_chk") {
+ return false;
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CMakeLists.txt b/contrib/llvm/lib/Transforms/Utils/CMakeLists.txt
new file mode 100644
index 0000000..61cbeb2
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -0,0 +1,28 @@
+add_llvm_library(LLVMTransformUtils
+ AddrModeMatcher.cpp
+ BasicBlockUtils.cpp
+ BasicInliner.cpp
+ BreakCriticalEdges.cpp
+ BuildLibCalls.cpp
+ CloneFunction.cpp
+ CloneLoop.cpp
+ CloneModule.cpp
+ CodeExtractor.cpp
+ DemoteRegToStack.cpp
+ InlineFunction.cpp
+ InstructionNamer.cpp
+ LCSSA.cpp
+ Local.cpp
+ LoopSimplify.cpp
+ LoopUnroll.cpp
+ LowerInvoke.cpp
+ LowerSwitch.cpp
+ Mem2Reg.cpp
+ PromoteMemoryToRegister.cpp
+ SSAUpdater.cpp
+ SimplifyCFG.cpp
+ UnifyFunctionExitNodes.cpp
+ ValueMapper.cpp
+ )
+
+target_link_libraries (LLVMTransformUtils LLVMSupport)
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
new file mode 100644
index 0000000..f43186e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -0,0 +1,580 @@
+//===- CloneFunction.cpp - Clone a function into another function ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneFunctionInto interface, which is used as the
+// low-level function cloner. This is used by the CloneFunction and function
+// inliner to do the dirty work of copying the body of a function around.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include <map>
+using namespace llvm;
+
+// CloneBasicBlock - See comments in Cloning.h
+BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
+ ValueToValueMapTy &VMap,
+ const Twine &NameSuffix, Function *F,
+ ClonedCodeInfo *CodeInfo) {
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
+ if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+
+ bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
+
+ // Loop over all instructions, and copy them over.
+ for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
+ II != IE; ++II) {
+ Instruction *NewInst = II->clone();
+ if (II->hasName())
+ NewInst->setName(II->getName()+NameSuffix);
+ NewBB->getInstList().push_back(NewInst);
+ VMap[II] = NewInst; // Add instruction map to value.
+
+ hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (isa<ConstantInt>(AI->getArraySize()))
+ hasStaticAllocas = true;
+ else
+ hasDynamicAllocas = true;
+ }
+ }
+
+ if (CodeInfo) {
+ CodeInfo->ContainsCalls |= hasCalls;
+ CodeInfo->ContainsUnwinds |= isa<UnwindInst>(BB->getTerminator());
+ CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
+ CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
+ BB != &BB->getParent()->getEntryBlock();
+ }
+ return NewBB;
+}
+
+// Clone OldFunc into NewFunc, transforming the old arguments into references to
+// VMap values.
+//
+void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
+ ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
+ SmallVectorImpl<ReturnInst*> &Returns,
+ const char *NameSuffix, ClonedCodeInfo *CodeInfo) {
+ assert(NameSuffix && "NameSuffix cannot be null!");
+
+#ifndef NDEBUG
+ for (Function::const_arg_iterator I = OldFunc->arg_begin(),
+ E = OldFunc->arg_end(); I != E; ++I)
+ assert(VMap.count(I) && "No mapping from source argument specified!");
+#endif
+
+ // Clone any attributes.
+ if (NewFunc->arg_size() == OldFunc->arg_size())
+ NewFunc->copyAttributesFrom(OldFunc);
+ else {
+ //Some arguments were deleted with the VMap. Copy arguments one by one
+ for (Function::const_arg_iterator I = OldFunc->arg_begin(),
+ E = OldFunc->arg_end(); I != E; ++I)
+ if (Argument* Anew = dyn_cast<Argument>(VMap[I]))
+ Anew->addAttr( OldFunc->getAttributes()
+ .getParamAttributes(I->getArgNo() + 1));
+ NewFunc->setAttributes(NewFunc->getAttributes()
+ .addAttr(0, OldFunc->getAttributes()
+ .getRetAttributes()));
+ NewFunc->setAttributes(NewFunc->getAttributes()
+ .addAttr(~0, OldFunc->getAttributes()
+ .getFnAttributes()));
+
+ }
+
+ // Loop over all of the basic blocks in the function, cloning them as
+ // appropriate. Note that we save BE this way in order to handle cloning of
+ // recursive functions into themselves.
+ //
+ for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
+ BI != BE; ++BI) {
+ const BasicBlock &BB = *BI;
+
+ // Create a new basic block and copy instructions into it!
+ BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc,
+ CodeInfo);
+ VMap[&BB] = CBB; // Add basic block mapping.
+
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
+ Returns.push_back(RI);
+ }
+
+ // Loop over all of the instructions in the function, fixing up operand
+ // references as we go. This uses VMap to do all the hard work.
+ //
+ for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]),
+ BE = NewFunc->end(); BB != BE; ++BB)
+ // Loop over all instructions, fixing each one as we find it...
+ for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
+ RemapInstruction(II, VMap, ModuleLevelChanges);
+}
+
+/// CloneFunction - Return a copy of the specified function, but without
+/// embedding the function into another module. Also, any references specified
+/// in the VMap are changed to refer to their mapped value instead of the
+/// original one. If any of the arguments to the function are in the VMap,
+/// the arguments are deleted from the resultant function. The VMap is
+/// updated to include mappings from all of the instructions and basicblocks in
+/// the function from their old to new values.
+///
+Function *llvm::CloneFunction(const Function *F,
+ ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
+ ClonedCodeInfo *CodeInfo) {
+ std::vector<const Type*> ArgTypes;
+
+ // The user might be deleting arguments to the function by specifying them in
+ // the VMap. If so, we need to not add the arguments to the arg ty vector
+ //
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I)
+ if (VMap.count(I) == 0) // Haven't mapped the argument to anything yet?
+ ArgTypes.push_back(I->getType());
+
+ // Create a new function type...
+ FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(),
+ ArgTypes, F->getFunctionType()->isVarArg());
+
+ // Create the new function...
+ Function *NewF = Function::Create(FTy, F->getLinkage(), F->getName());
+
+ // Loop over the arguments, copying the names of the mapped arguments over...
+ Function::arg_iterator DestI = NewF->arg_begin();
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I)
+ if (VMap.count(I) == 0) { // Is this argument preserved?
+ DestI->setName(I->getName()); // Copy the name over...
+ VMap[I] = DestI++; // Add mapping to VMap
+ }
+
+ SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
+ CloneFunctionInto(NewF, F, VMap, ModuleLevelChanges, Returns, "", CodeInfo);
+ return NewF;
+}
+
+
+
+namespace {
+ /// PruningFunctionCloner - This class is a private class used to implement
+ /// the CloneAndPruneFunctionInto method.
+ struct PruningFunctionCloner {
+ Function *NewFunc;
+ const Function *OldFunc;
+ ValueToValueMapTy &VMap;
+ bool ModuleLevelChanges;
+ SmallVectorImpl<ReturnInst*> &Returns;
+ const char *NameSuffix;
+ ClonedCodeInfo *CodeInfo;
+ const TargetData *TD;
+ public:
+ PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
+ ValueToValueMapTy &valueMap,
+ bool moduleLevelChanges,
+ SmallVectorImpl<ReturnInst*> &returns,
+ const char *nameSuffix,
+ ClonedCodeInfo *codeInfo,
+ const TargetData *td)
+ : NewFunc(newFunc), OldFunc(oldFunc),
+ VMap(valueMap), ModuleLevelChanges(moduleLevelChanges),
+ Returns(returns), NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
+ }
+
+ /// CloneBlock - The specified block is found to be reachable, clone it and
+ /// anything that it can reach.
+ void CloneBlock(const BasicBlock *BB,
+ std::vector<const BasicBlock*> &ToClone);
+
+ public:
+ /// ConstantFoldMappedInstruction - Constant fold the specified instruction,
+ /// mapping its operands through VMap if they are available.
+ Constant *ConstantFoldMappedInstruction(const Instruction *I);
+ };
+}
+
+/// CloneBlock - The specified block is found to be reachable, clone it and
+/// anything that it can reach.
+void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
+ std::vector<const BasicBlock*> &ToClone){
+ Value *&BBEntry = VMap[BB];
+
+ // Have we already cloned this block?
+ if (BBEntry) return;
+
+ // Nope, clone it now.
+ BasicBlock *NewBB;
+ BBEntry = NewBB = BasicBlock::Create(BB->getContext());
+ if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+
+ bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
+
+ // Loop over all instructions, and copy them over, DCE'ing as we go. This
+ // loop doesn't include the terminator.
+ for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end();
+ II != IE; ++II) {
+ // If this instruction constant folds, don't bother cloning the instruction,
+ // instead, just add the constant to the value map.
+ if (Constant *C = ConstantFoldMappedInstruction(II)) {
+ VMap[II] = C;
+ continue;
+ }
+
+ Instruction *NewInst = II->clone();
+ if (II->hasName())
+ NewInst->setName(II->getName()+NameSuffix);
+ NewBB->getInstList().push_back(NewInst);
+ VMap[II] = NewInst; // Add instruction map to value.
+
+ hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (isa<ConstantInt>(AI->getArraySize()))
+ hasStaticAllocas = true;
+ else
+ hasDynamicAllocas = true;
+ }
+ }
+
+ // Finally, clone over the terminator.
+ const TerminatorInst *OldTI = BB->getTerminator();
+ bool TerminatorDone = false;
+ if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
+ if (BI->isConditional()) {
+ // If the condition was a known constant in the callee...
+ ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
+ // Or is a known constant in the caller...
+ if (Cond == 0)
+ Cond = dyn_cast_or_null<ConstantInt>(VMap[BI->getCondition()]);
+
+ // Constant fold to uncond branch!
+ if (Cond) {
+ BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue());
+ VMap[OldTI] = BranchInst::Create(Dest, NewBB);
+ ToClone.push_back(Dest);
+ TerminatorDone = true;
+ }
+ }
+ } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
+ // If switching on a value known constant in the caller.
+ ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
+ if (Cond == 0) // Or known constant after constant prop in the callee...
+ Cond = dyn_cast_or_null<ConstantInt>(VMap[SI->getCondition()]);
+ if (Cond) { // Constant fold to uncond branch!
+ BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond));
+ VMap[OldTI] = BranchInst::Create(Dest, NewBB);
+ ToClone.push_back(Dest);
+ TerminatorDone = true;
+ }
+ }
+
+ if (!TerminatorDone) {
+ Instruction *NewInst = OldTI->clone();
+ if (OldTI->hasName())
+ NewInst->setName(OldTI->getName()+NameSuffix);
+ NewBB->getInstList().push_back(NewInst);
+ VMap[OldTI] = NewInst; // Add instruction map to value.
+
+ // Recursively clone any reachable successor blocks.
+ const TerminatorInst *TI = BB->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ ToClone.push_back(TI->getSuccessor(i));
+ }
+
+ if (CodeInfo) {
+ CodeInfo->ContainsCalls |= hasCalls;
+ CodeInfo->ContainsUnwinds |= isa<UnwindInst>(OldTI);
+ CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
+ CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
+ BB != &BB->getParent()->front();
+ }
+
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(NewBB->getTerminator()))
+ Returns.push_back(RI);
+}
+
+/// ConstantFoldMappedInstruction - Constant fold the specified instruction,
+/// mapping its operands through VMap if they are available.
+Constant *PruningFunctionCloner::
+ConstantFoldMappedInstruction(const Instruction *I) {
+ SmallVector<Constant*, 8> Ops;
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
+ VMap, ModuleLevelChanges)))
+ Ops.push_back(Op);
+ else
+ return 0; // All operands not constant!
+
+ if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+ return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
+ TD);
+
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
+ if (!LI->isVolatile() && CE->getOpcode() == Instruction::GetElementPtr)
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer())
+ return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(),
+ CE);
+
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), &Ops[0],
+ Ops.size(), TD);
+}
+
+static DebugLoc
+UpdateInlinedAtInfo(const DebugLoc &InsnDL, const DebugLoc &TheCallDL,
+ LLVMContext &Ctx) {
+ DebugLoc NewLoc = TheCallDL;
+ if (MDNode *IA = InsnDL.getInlinedAt(Ctx))
+ NewLoc = UpdateInlinedAtInfo(DebugLoc::getFromDILocation(IA), TheCallDL,
+ Ctx);
+
+ return DebugLoc::get(InsnDL.getLine(), InsnDL.getCol(),
+ InsnDL.getScope(Ctx), NewLoc.getAsMDNode(Ctx));
+}
+
+/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
+/// except that it does some simple constant prop and DCE on the fly. The
+/// effect of this is to copy significantly less code in cases where (for
+/// example) a function call with constant arguments is inlined, and those
+/// constant arguments cause a significant amount of code in the callee to be
+/// dead. Since this doesn't produce an exact copy of the input, it can't be
+/// used for things like CloneFunction or CloneModule.
+void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
+ ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
+ SmallVectorImpl<ReturnInst*> &Returns,
+ const char *NameSuffix,
+ ClonedCodeInfo *CodeInfo,
+ const TargetData *TD,
+ Instruction *TheCall) {
+ assert(NameSuffix && "NameSuffix cannot be null!");
+
+#ifndef NDEBUG
+ for (Function::const_arg_iterator II = OldFunc->arg_begin(),
+ E = OldFunc->arg_end(); II != E; ++II)
+ assert(VMap.count(II) && "No mapping from source argument specified!");
+#endif
+
+ PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
+ Returns, NameSuffix, CodeInfo, TD);
+
+ // Clone the entry block, and anything recursively reachable from it.
+ std::vector<const BasicBlock*> CloneWorklist;
+ CloneWorklist.push_back(&OldFunc->getEntryBlock());
+ while (!CloneWorklist.empty()) {
+ const BasicBlock *BB = CloneWorklist.back();
+ CloneWorklist.pop_back();
+ PFC.CloneBlock(BB, CloneWorklist);
+ }
+
+ // Loop over all of the basic blocks in the old function. If the block was
+ // reachable, we have cloned it and the old block is now in the value map:
+ // insert it into the new function in the right order. If not, ignore it.
+ //
+ // Defer PHI resolution until rest of function is resolved.
+ SmallVector<const PHINode*, 16> PHIToResolve;
+ for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
+ BI != BE; ++BI) {
+ BasicBlock *NewBB = cast_or_null<BasicBlock>(VMap[BI]);
+ if (NewBB == 0) continue; // Dead block.
+
+ // Add the new block to the new function.
+ NewFunc->getBasicBlockList().push_back(NewBB);
+
+ // Loop over all of the instructions in the block, fixing up operand
+ // references as we go. This uses VMap to do all the hard work.
+ //
+ BasicBlock::iterator I = NewBB->begin();
+
+ DebugLoc TheCallDL;
+ if (TheCall)
+ TheCallDL = TheCall->getDebugLoc();
+
+ // Handle PHI nodes specially, as we have to remove references to dead
+ // blocks.
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ // Skip over all PHI nodes, remembering them for later.
+ BasicBlock::const_iterator OldI = BI->begin();
+ for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) {
+ if (I->hasMetadata()) {
+ if (!TheCallDL.isUnknown()) {
+ DebugLoc IDL = I->getDebugLoc();
+ if (!IDL.isUnknown()) {
+ DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL,
+ I->getContext());
+ I->setDebugLoc(NewDL);
+ }
+ } else {
+ // The cloned instruction has dbg info but the call instruction
+ // does not have dbg info. Remove dbg info from cloned instruction.
+ I->setDebugLoc(DebugLoc());
+ }
+ }
+ PHIToResolve.push_back(cast<PHINode>(OldI));
+ }
+ }
+
+ // FIXME:
+ // FIXME:
+ // FIXME: Unclone all this metadata stuff.
+ // FIXME:
+ // FIXME:
+
+ // Otherwise, remap the rest of the instructions normally.
+ for (; I != NewBB->end(); ++I) {
+ if (I->hasMetadata()) {
+ if (!TheCallDL.isUnknown()) {
+ DebugLoc IDL = I->getDebugLoc();
+ if (!IDL.isUnknown()) {
+ DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL,
+ I->getContext());
+ I->setDebugLoc(NewDL);
+ }
+ } else {
+ // The cloned instruction has dbg info but the call instruction
+ // does not have dbg info. Remove dbg info from cloned instruction.
+ I->setDebugLoc(DebugLoc());
+ }
+ }
+ RemapInstruction(I, VMap, ModuleLevelChanges);
+ }
+ }
+
+ // Defer PHI resolution until rest of function is resolved, PHI resolution
+ // requires the CFG to be up-to-date.
+ for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) {
+ const PHINode *OPN = PHIToResolve[phino];
+ unsigned NumPreds = OPN->getNumIncomingValues();
+ const BasicBlock *OldBB = OPN->getParent();
+ BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]);
+
+ // Map operands for blocks that are live and remove operands for blocks
+ // that are dead.
+ for (; phino != PHIToResolve.size() &&
+ PHIToResolve[phino]->getParent() == OldBB; ++phino) {
+ OPN = PHIToResolve[phino];
+ PHINode *PN = cast<PHINode>(VMap[OPN]);
+ for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
+ if (BasicBlock *MappedBlock =
+ cast_or_null<BasicBlock>(VMap[PN->getIncomingBlock(pred)])) {
+ Value *InVal = MapValue(PN->getIncomingValue(pred),
+ VMap, ModuleLevelChanges);
+ assert(InVal && "Unknown input value?");
+ PN->setIncomingValue(pred, InVal);
+ PN->setIncomingBlock(pred, MappedBlock);
+ } else {
+ PN->removeIncomingValue(pred, false);
+ --pred, --e; // Revisit the next entry.
+ }
+ }
+ }
+
+ // The loop above has removed PHI entries for those blocks that are dead
+ // and has updated others. However, if a block is live (i.e. copied over)
+ // but its terminator has been changed to not go to this block, then our
+ // phi nodes will have invalid entries. Update the PHI nodes in this
+ // case.
+ PHINode *PN = cast<PHINode>(NewBB->begin());
+ NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB));
+ if (NumPreds != PN->getNumIncomingValues()) {
+ assert(NumPreds < PN->getNumIncomingValues());
+ // Count how many times each predecessor comes to this block.
+ std::map<BasicBlock*, unsigned> PredCount;
+ for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB);
+ PI != E; ++PI)
+ --PredCount[*PI];
+
+ // Figure out how many entries to remove from each PHI.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ ++PredCount[PN->getIncomingBlock(i)];
+
+ // At this point, the excess predecessor entries are positive in the
+ // map. Loop over all of the PHIs and remove excess predecessor
+ // entries.
+ BasicBlock::iterator I = NewBB->begin();
+ for (; (PN = dyn_cast<PHINode>(I)); ++I) {
+ for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(),
+ E = PredCount.end(); PCI != E; ++PCI) {
+ BasicBlock *Pred = PCI->first;
+ for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove)
+ PN->removeIncomingValue(Pred, false);
+ }
+ }
+ }
+
+ // If the loops above have made these phi nodes have 0 or 1 operand,
+ // replace them with undef or the input value. We must do this for
+ // correctness, because 0-operand phis are not valid.
+ PN = cast<PHINode>(NewBB->begin());
+ if (PN->getNumIncomingValues() == 0) {
+ BasicBlock::iterator I = NewBB->begin();
+ BasicBlock::const_iterator OldI = OldBB->begin();
+ while ((PN = dyn_cast<PHINode>(I++))) {
+ Value *NV = UndefValue::get(PN->getType());
+ PN->replaceAllUsesWith(NV);
+ assert(VMap[OldI] == PN && "VMap mismatch");
+ VMap[OldI] = NV;
+ PN->eraseFromParent();
+ ++OldI;
+ }
+ }
+ // NOTE: We cannot eliminate single entry phi nodes here, because of
+ // VMap. Single entry phi nodes can have multiple VMap entries
+ // pointing at them. Thus, deleting one would require scanning the VMap
+ // to update any entries in it that would require that. This would be
+ // really slow.
+ }
+
+ // Now that the inlined function body has been fully constructed, go through
+ // and zap unconditional fall-through branches. This happen all the time when
+ // specializing code: code specialization turns conditional branches into
+ // uncond branches, and this code folds them.
+ Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]);
+ while (I != NewFunc->end()) {
+ BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
+ if (!BI || BI->isConditional()) { ++I; continue; }
+
+ // Note that we can't eliminate uncond branches if the destination has
+ // single-entry PHI nodes. Eliminating the single-entry phi nodes would
+ // require scanning the VMap to update any entries that point to the phi
+ // node.
+ BasicBlock *Dest = BI->getSuccessor(0);
+ if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) {
+ ++I; continue;
+ }
+
+ // We know all single-entry PHI nodes in the inlined function have been
+ // removed, so we just need to splice the blocks.
+ BI->eraseFromParent();
+
+ // Move all the instructions in the succ to the pred.
+ I->getInstList().splice(I->end(), Dest->getInstList());
+
+ // Make all PHI nodes that referred to Dest now refer to I as their source.
+ Dest->replaceAllUsesWith(I);
+
+ // Remove the dest block.
+ Dest->eraseFromParent();
+
+ // Do not increment I, iteratively merge all things this block branches to.
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneLoop.cpp b/contrib/llvm/lib/Transforms/Utils/CloneLoop.cpp
new file mode 100644
index 0000000..551b630
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CloneLoop.cpp
@@ -0,0 +1,151 @@
+//===- CloneLoop.cpp - Clone loop nest ------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneLoop interface which makes a copy of a loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Dominators.h"
+
+
+using namespace llvm;
+
+/// CloneDominatorInfo - Clone basicblock's dominator tree and, if available,
+/// dominance info. It is expected that basic block is already cloned.
+static void CloneDominatorInfo(BasicBlock *BB,
+ ValueMap<const Value *, Value *> &VMap,
+ DominatorTree *DT,
+ DominanceFrontier *DF) {
+
+ assert (DT && "DominatorTree is not available");
+ ValueMap<const Value *, Value*>::iterator BI = VMap.find(BB);
+ assert (BI != VMap.end() && "BasicBlock clone is missing");
+ BasicBlock *NewBB = cast<BasicBlock>(BI->second);
+
+ // NewBB already got dominator info.
+ if (DT->getNode(NewBB))
+ return;
+
+ assert (DT->getNode(BB) && "BasicBlock does not have dominator info");
+ // Entry block is not expected here. Infinite loops are not to cloned.
+ assert (DT->getNode(BB)->getIDom() && "BasicBlock does not have immediate dominator");
+ BasicBlock *BBDom = DT->getNode(BB)->getIDom()->getBlock();
+
+ // NewBB's dominator is either BB's dominator or BB's dominator's clone.
+ BasicBlock *NewBBDom = BBDom;
+ ValueMap<const Value *, Value*>::iterator BBDomI = VMap.find(BBDom);
+ if (BBDomI != VMap.end()) {
+ NewBBDom = cast<BasicBlock>(BBDomI->second);
+ if (!DT->getNode(NewBBDom))
+ CloneDominatorInfo(BBDom, VMap, DT, DF);
+ }
+ DT->addNewBlock(NewBB, NewBBDom);
+
+ // Copy cloned dominance frontiner set
+ if (DF) {
+ DominanceFrontier::DomSetType NewDFSet;
+ DominanceFrontier::iterator DFI = DF->find(BB);
+ if ( DFI != DF->end()) {
+ DominanceFrontier::DomSetType S = DFI->second;
+ for (DominanceFrontier::DomSetType::iterator I = S.begin(), E = S.end();
+ I != E; ++I) {
+ BasicBlock *DB = *I;
+ ValueMap<const Value*, Value*>::iterator IDM = VMap.find(DB);
+ if (IDM != VMap.end())
+ NewDFSet.insert(cast<BasicBlock>(IDM->second));
+ else
+ NewDFSet.insert(DB);
+ }
+ }
+ DF->addBasicBlock(NewBB, NewDFSet);
+ }
+}
+
+/// CloneLoop - Clone Loop. Clone dominator info. Populate VMap
+/// using old blocks to new blocks mapping.
+Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI,
+ ValueMap<const Value *, Value *> &VMap, Pass *P) {
+
+ DominatorTree *DT = NULL;
+ DominanceFrontier *DF = NULL;
+ if (P) {
+ DT = P->getAnalysisIfAvailable<DominatorTree>();
+ DF = P->getAnalysisIfAvailable<DominanceFrontier>();
+ }
+
+ SmallVector<BasicBlock *, 16> NewBlocks;
+
+ // Populate loop nest.
+ SmallVector<Loop *, 8> LoopNest;
+ LoopNest.push_back(OrigL);
+
+
+ Loop *NewParentLoop = NULL;
+ do {
+ Loop *L = LoopNest.pop_back_val();
+ Loop *NewLoop = new Loop();
+
+ if (!NewParentLoop)
+ NewParentLoop = NewLoop;
+
+ LPM->insertLoop(NewLoop, L->getParentLoop());
+
+ // Clone Basic Blocks.
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+ BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".clone");
+ VMap[BB] = NewBB;
+ if (P)
+ LPM->cloneBasicBlockSimpleAnalysis(BB, NewBB, L);
+ NewLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ NewBlocks.push_back(NewBB);
+ }
+
+ // Clone dominator info.
+ if (DT)
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+ CloneDominatorInfo(BB, VMap, DT, DF);
+ }
+
+ // Process sub loops
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ LoopNest.push_back(*I);
+ } while (!LoopNest.empty());
+
+ // Remap instructions to reference operands from VMap.
+ for(SmallVector<BasicBlock *, 16>::iterator NBItr = NewBlocks.begin(),
+ NBE = NewBlocks.end(); NBItr != NBE; ++NBItr) {
+ BasicBlock *NB = *NBItr;
+ for(BasicBlock::iterator BI = NB->begin(), BE = NB->end();
+ BI != BE; ++BI) {
+ Instruction *Insn = BI;
+ for (unsigned index = 0, num_ops = Insn->getNumOperands();
+ index != num_ops; ++index) {
+ Value *Op = Insn->getOperand(index);
+ ValueMap<const Value *, Value *>::iterator OpItr = VMap.find(Op);
+ if (OpItr != VMap.end())
+ Insn->setOperand(index, OpItr->second);
+ }
+ }
+ }
+
+ BasicBlock *Latch = OrigL->getLoopLatch();
+ Function *F = Latch->getParent();
+ F->getBasicBlockList().insert(OrigL->getHeader(),
+ NewBlocks.begin(), NewBlocks.end());
+
+
+ return NewParentLoop;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
new file mode 100644
index 0000000..b347bf5
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -0,0 +1,137 @@
+//===- CloneModule.cpp - Clone an entire module ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneModule interface which makes a copy of an
+// entire module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/Constant.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+/// CloneModule - Return an exact copy of the specified module. This is not as
+/// easy as it might seem because we have to worry about making copies of global
+/// variables and functions, and making their (initializers and references,
+/// respectively) refer to the right globals.
+///
+Module *llvm::CloneModule(const Module *M) {
+ // Create the value map that maps things from the old module over to the new
+ // module.
+ ValueToValueMapTy VMap;
+ return CloneModule(M, VMap);
+}
+
+Module *llvm::CloneModule(const Module *M,
+ ValueToValueMapTy &VMap) {
+ // First off, we need to create the new module...
+ Module *New = new Module(M->getModuleIdentifier(), M->getContext());
+ New->setDataLayout(M->getDataLayout());
+ New->setTargetTriple(M->getTargetTriple());
+ New->setModuleInlineAsm(M->getModuleInlineAsm());
+
+ // Copy all of the type symbol table entries over.
+ const TypeSymbolTable &TST = M->getTypeSymbolTable();
+ for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end();
+ TI != TE; ++TI)
+ New->addTypeName(TI->first, TI->second);
+
+ // Copy all of the dependent libraries over.
+ for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I)
+ New->addLibrary(*I);
+
+ // Loop over all of the global variables, making corresponding globals in the
+ // new module. Here we add them to the VMap and to the new Module. We
+ // don't worry about attributes or initializers, they will come later.
+ //
+ for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+ I != E; ++I) {
+ GlobalVariable *GV = new GlobalVariable(*New,
+ I->getType()->getElementType(),
+ false,
+ GlobalValue::ExternalLinkage, 0,
+ I->getName());
+ GV->setAlignment(I->getAlignment());
+ VMap[I] = GV;
+ }
+
+ // Loop over the functions in the module, making external functions as before
+ for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+ Function *NF =
+ Function::Create(cast<FunctionType>(I->getType()->getElementType()),
+ GlobalValue::ExternalLinkage, I->getName(), New);
+ NF->copyAttributesFrom(I);
+ VMap[I] = NF;
+ }
+
+ // Loop over the aliases in the module
+ for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I)
+ VMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage,
+ I->getName(), NULL, New);
+
+ // Now that all of the things that global variable initializer can refer to
+ // have been created, loop through and copy the global variable referrers
+ // over... We also set the attributes on the global now.
+ //
+ for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+ I != E; ++I) {
+ GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
+ if (I->hasInitializer())
+ GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(),
+ VMap,
+ true)));
+ GV->setLinkage(I->getLinkage());
+ GV->setThreadLocal(I->isThreadLocal());
+ GV->setConstant(I->isConstant());
+ }
+
+ // Similarly, copy over function bodies now...
+ //
+ for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+ Function *F = cast<Function>(VMap[I]);
+ if (!I->isDeclaration()) {
+ Function::arg_iterator DestI = F->arg_begin();
+ for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end();
+ ++J) {
+ DestI->setName(J->getName());
+ VMap[J] = DestI++;
+ }
+
+ SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
+ CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns);
+ }
+
+ F->setLinkage(I->getLinkage());
+ }
+
+ // And aliases
+ for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I) {
+ GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
+ GA->setLinkage(I->getLinkage());
+ if (const Constant* C = I->getAliasee())
+ GA->setAliasee(cast<Constant>(MapValue(C, VMap, true)));
+ }
+
+ // And named metadata....
+ for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+ E = M->named_metadata_end(); I != E; ++I) {
+ const NamedMDNode &NMD = *I;
+ NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
+ for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
+ NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap, true)));
+ }
+
+ return New;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
new file mode 100644
index 0000000..b51f751
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -0,0 +1,795 @@
+//===- CodeExtractor.cpp - Pull code region into a new function -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interface to tear out a code region, such as an
+// individual loop or a parallel section, into a new function, replacing it with
+// a call to the new function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <set>
+using namespace llvm;
+
+// Provide a command-line option to aggregate function arguments into a struct
+// for functions produced by the code extractor. This is useful when converting
+// extracted functions to pthread-based code, as only one argument (void*) can
+// be passed in to pthread_create().
+static cl::opt<bool>
+AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
+ cl::desc("Aggregate arguments to code-extracted functions"));
+
+namespace {
+ class CodeExtractor {
+ typedef SetVector<Value*> Values;
+ SetVector<BasicBlock*> BlocksToExtract;
+ DominatorTree* DT;
+ bool AggregateArgs;
+ unsigned NumExitBlocks;
+ const Type *RetTy;
+ public:
+ CodeExtractor(DominatorTree* dt = 0, bool AggArgs = false)
+ : DT(dt), AggregateArgs(AggArgs||AggregateArgsOpt), NumExitBlocks(~0U) {}
+
+ Function *ExtractCodeRegion(const std::vector<BasicBlock*> &code);
+
+ bool isEligible(const std::vector<BasicBlock*> &code);
+
+ private:
+ /// definedInRegion - Return true if the specified value is defined in the
+ /// extracted region.
+ bool definedInRegion(Value *V) const {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (BlocksToExtract.count(I->getParent()))
+ return true;
+ return false;
+ }
+
+ /// definedInCaller - Return true if the specified value is defined in the
+ /// function being code extracted, but not in the region being extracted.
+ /// These values must be passed in as live-ins to the function.
+ bool definedInCaller(Value *V) const {
+ if (isa<Argument>(V)) return true;
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (!BlocksToExtract.count(I->getParent()))
+ return true;
+ return false;
+ }
+
+ void severSplitPHINodes(BasicBlock *&Header);
+ void splitReturnBlocks();
+ void findInputsOutputs(Values &inputs, Values &outputs);
+
+ Function *constructFunction(const Values &inputs,
+ const Values &outputs,
+ BasicBlock *header,
+ BasicBlock *newRootNode, BasicBlock *newHeader,
+ Function *oldFunction, Module *M);
+
+ void moveCodeToFunction(Function *newFunction);
+
+ void emitCallAndSwitchStatement(Function *newFunction,
+ BasicBlock *newHeader,
+ Values &inputs,
+ Values &outputs);
+
+ };
+}
+
+/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the
+/// region, we need to split the entry block of the region so that the PHI node
+/// is easier to deal with.
+void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
+ bool HasPredsFromRegion = false;
+ unsigned NumPredsOutsideRegion = 0;
+
+ if (Header != &Header->getParent()->getEntryBlock()) {
+ PHINode *PN = dyn_cast<PHINode>(Header->begin());
+ if (!PN) return; // No PHI nodes.
+
+ // If the header node contains any PHI nodes, check to see if there is more
+ // than one entry from outside the region. If so, we need to sever the
+ // header block into two.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (BlocksToExtract.count(PN->getIncomingBlock(i)))
+ HasPredsFromRegion = true;
+ else
+ ++NumPredsOutsideRegion;
+
+ // If there is one (or fewer) predecessor from outside the region, we don't
+ // need to do anything special.
+ if (NumPredsOutsideRegion <= 1) return;
+ }
+
+ // Otherwise, we need to split the header block into two pieces: one
+ // containing PHI nodes merging values from outside of the region, and a
+ // second that contains all of the code for the block and merges back any
+ // incoming values from inside of the region.
+ BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI();
+ BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs,
+ Header->getName()+".ce");
+
+ // We only want to code extract the second block now, and it becomes the new
+ // header of the region.
+ BasicBlock *OldPred = Header;
+ BlocksToExtract.remove(OldPred);
+ BlocksToExtract.insert(NewBB);
+ Header = NewBB;
+
+ // Okay, update dominator sets. The blocks that dominate the new one are the
+ // blocks that dominate TIBB plus the new block itself.
+ if (DT)
+ DT->splitBlock(NewBB);
+
+ // Okay, now we need to adjust the PHI nodes and any branches from within the
+ // region to go to the new header block instead of the old header block.
+ if (HasPredsFromRegion) {
+ PHINode *PN = cast<PHINode>(OldPred->begin());
+ // Loop over all of the predecessors of OldPred that are in the region,
+ // changing them to branch to NewBB instead.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (BlocksToExtract.count(PN->getIncomingBlock(i))) {
+ TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator();
+ TI->replaceUsesOfWith(OldPred, NewBB);
+ }
+
+ // Okay, everthing within the region is now branching to the right block, we
+ // just have to update the PHI nodes now, inserting PHI nodes into NewBB.
+ for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) {
+ PHINode *PN = cast<PHINode>(AfterPHIs);
+ // Create a new PHI node in the new region, which has an incoming value
+ // from OldPred of PN.
+ PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".ce",
+ NewBB->begin());
+ NewPN->addIncoming(PN, OldPred);
+
+ // Loop over all of the incoming value in PN, moving them to NewPN if they
+ // are from the extracted region.
+ for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
+ if (BlocksToExtract.count(PN->getIncomingBlock(i))) {
+ NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i));
+ PN->removeIncomingValue(i);
+ --i;
+ }
+ }
+ }
+ }
+}
+
+void CodeExtractor::splitReturnBlocks() {
+ for (SetVector<BasicBlock*>::iterator I = BlocksToExtract.begin(),
+ E = BlocksToExtract.end(); I != E; ++I)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) {
+ BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
+ if (DT) {
+ // Old dominates New. New node domiantes all other nodes dominated
+ //by Old.
+ DomTreeNode *OldNode = DT->getNode(*I);
+ SmallVector<DomTreeNode*, 8> Children;
+ for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end();
+ DI != DE; ++DI)
+ Children.push_back(*DI);
+
+ DomTreeNode *NewNode = DT->addNewBlock(New, *I);
+
+ for (SmallVector<DomTreeNode*, 8>::iterator I = Children.begin(),
+ E = Children.end(); I != E; ++I)
+ DT->changeImmediateDominator(*I, NewNode);
+ }
+ }
+}
+
+// findInputsOutputs - Find inputs to, outputs from the code region.
+//
+void CodeExtractor::findInputsOutputs(Values &inputs, Values &outputs) {
+ std::set<BasicBlock*> ExitBlocks;
+ for (SetVector<BasicBlock*>::const_iterator ci = BlocksToExtract.begin(),
+ ce = BlocksToExtract.end(); ci != ce; ++ci) {
+ BasicBlock *BB = *ci;
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // If a used value is defined outside the region, it's an input. If an
+ // instruction is used outside the region, it's an output.
+ for (User::op_iterator O = I->op_begin(), E = I->op_end(); O != E; ++O)
+ if (definedInCaller(*O))
+ inputs.insert(*O);
+
+ // Consider uses of this instruction (outputs).
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI)
+ if (!definedInRegion(*UI)) {
+ outputs.insert(I);
+ break;
+ }
+ } // for: insts
+
+ // Keep track of the exit blocks from the region.
+ TerminatorInst *TI = BB->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ if (!BlocksToExtract.count(TI->getSuccessor(i)))
+ ExitBlocks.insert(TI->getSuccessor(i));
+ } // for: basic blocks
+
+ NumExitBlocks = ExitBlocks.size();
+}
+
+/// constructFunction - make a function based on inputs and outputs, as follows:
+/// f(in0, ..., inN, out0, ..., outN)
+///
+Function *CodeExtractor::constructFunction(const Values &inputs,
+ const Values &outputs,
+ BasicBlock *header,
+ BasicBlock *newRootNode,
+ BasicBlock *newHeader,
+ Function *oldFunction,
+ Module *M) {
+ DEBUG(dbgs() << "inputs: " << inputs.size() << "\n");
+ DEBUG(dbgs() << "outputs: " << outputs.size() << "\n");
+
+ // This function returns unsigned, outputs will go back by reference.
+ switch (NumExitBlocks) {
+ case 0:
+ case 1: RetTy = Type::getVoidTy(header->getContext()); break;
+ case 2: RetTy = Type::getInt1Ty(header->getContext()); break;
+ default: RetTy = Type::getInt16Ty(header->getContext()); break;
+ }
+
+ std::vector<const Type*> paramTy;
+
+ // Add the types of the input values to the function's argument list
+ for (Values::const_iterator i = inputs.begin(),
+ e = inputs.end(); i != e; ++i) {
+ const Value *value = *i;
+ DEBUG(dbgs() << "value used in func: " << *value << "\n");
+ paramTy.push_back(value->getType());
+ }
+
+ // Add the types of the output values to the function's argument list.
+ for (Values::const_iterator I = outputs.begin(), E = outputs.end();
+ I != E; ++I) {
+ DEBUG(dbgs() << "instr used in func: " << **I << "\n");
+ if (AggregateArgs)
+ paramTy.push_back((*I)->getType());
+ else
+ paramTy.push_back(PointerType::getUnqual((*I)->getType()));
+ }
+
+ DEBUG(dbgs() << "Function type: " << *RetTy << " f(");
+ for (std::vector<const Type*>::iterator i = paramTy.begin(),
+ e = paramTy.end(); i != e; ++i)
+ DEBUG(dbgs() << **i << ", ");
+ DEBUG(dbgs() << ")\n");
+
+ if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
+ PointerType *StructPtr =
+ PointerType::getUnqual(StructType::get(M->getContext(), paramTy));
+ paramTy.clear();
+ paramTy.push_back(StructPtr);
+ }
+ const FunctionType *funcType =
+ FunctionType::get(RetTy, paramTy, false);
+
+ // Create the new function
+ Function *newFunction = Function::Create(funcType,
+ GlobalValue::InternalLinkage,
+ oldFunction->getName() + "_" +
+ header->getName(), M);
+ // If the old function is no-throw, so is the new one.
+ if (oldFunction->doesNotThrow())
+ newFunction->setDoesNotThrow(true);
+
+ newFunction->getBasicBlockList().push_back(newRootNode);
+
+ // Create an iterator to name all of the arguments we inserted.
+ Function::arg_iterator AI = newFunction->arg_begin();
+
+ // Rewrite all users of the inputs in the extracted region to use the
+ // arguments (or appropriate addressing into struct) instead.
+ for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+ Value *RewriteVal;
+ if (AggregateArgs) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
+ TerminatorInst *TI = newFunction->begin()->getTerminator();
+ GetElementPtrInst *GEP =
+ GetElementPtrInst::Create(AI, Idx, Idx+2,
+ "gep_" + inputs[i]->getName(), TI);
+ RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
+ } else
+ RewriteVal = AI++;
+
+ std::vector<User*> Users(inputs[i]->use_begin(), inputs[i]->use_end());
+ for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end();
+ use != useE; ++use)
+ if (Instruction* inst = dyn_cast<Instruction>(*use))
+ if (BlocksToExtract.count(inst->getParent()))
+ inst->replaceUsesOfWith(inputs[i], RewriteVal);
+ }
+
+ // Set names for input and output arguments.
+ if (!AggregateArgs) {
+ AI = newFunction->arg_begin();
+ for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI)
+ AI->setName(inputs[i]->getName());
+ for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI)
+ AI->setName(outputs[i]->getName()+".out");
+ }
+
+ // Rewrite branches to basic blocks outside of the loop to new dummy blocks
+ // within the new function. This must be done before we lose track of which
+ // blocks were originally in the code region.
+ std::vector<User*> Users(header->use_begin(), header->use_end());
+ for (unsigned i = 0, e = Users.size(); i != e; ++i)
+ // The BasicBlock which contains the branch is not in the region
+ // modify the branch target to a new block
+ if (TerminatorInst *TI = dyn_cast<TerminatorInst>(Users[i]))
+ if (!BlocksToExtract.count(TI->getParent()) &&
+ TI->getParent()->getParent() == oldFunction)
+ TI->replaceUsesOfWith(header, newHeader);
+
+ return newFunction;
+}
+
+/// FindPhiPredForUseInBlock - Given a value and a basic block, find a PHI
+/// that uses the value within the basic block, and return the predecessor
+/// block associated with that use, or return 0 if none is found.
+static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) {
+ for (Value::use_iterator UI = Used->use_begin(),
+ UE = Used->use_end(); UI != UE; ++UI) {
+ PHINode *P = dyn_cast<PHINode>(*UI);
+ if (P && P->getParent() == BB)
+ return P->getIncomingBlock(UI);
+ }
+
+ return 0;
+}
+
+/// emitCallAndSwitchStatement - This method sets up the caller side by adding
+/// the call instruction, splitting any PHI nodes in the header block as
+/// necessary.
+void CodeExtractor::
+emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
+ Values &inputs, Values &outputs) {
+ // Emit a call to the new function, passing in: *pointer to struct (if
+ // aggregating parameters), or plan inputs and allocated memory for outputs
+ std::vector<Value*> params, StructValues, ReloadOutputs, Reloads;
+
+ LLVMContext &Context = newFunction->getContext();
+
+ // Add inputs as params, or to be filled into the struct
+ for (Values::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i)
+ if (AggregateArgs)
+ StructValues.push_back(*i);
+ else
+ params.push_back(*i);
+
+ // Create allocas for the outputs
+ for (Values::iterator i = outputs.begin(), e = outputs.end(); i != e; ++i) {
+ if (AggregateArgs) {
+ StructValues.push_back(*i);
+ } else {
+ AllocaInst *alloca =
+ new AllocaInst((*i)->getType(), 0, (*i)->getName()+".loc",
+ codeReplacer->getParent()->begin()->begin());
+ ReloadOutputs.push_back(alloca);
+ params.push_back(alloca);
+ }
+ }
+
+ AllocaInst *Struct = 0;
+ if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
+ std::vector<const Type*> ArgTypes;
+ for (Values::iterator v = StructValues.begin(),
+ ve = StructValues.end(); v != ve; ++v)
+ ArgTypes.push_back((*v)->getType());
+
+ // Allocate a struct at the beginning of this function
+ Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
+ Struct =
+ new AllocaInst(StructArgTy, 0, "structArg",
+ codeReplacer->getParent()->begin()->begin());
+ params.push_back(Struct);
+
+ for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
+ GetElementPtrInst *GEP =
+ GetElementPtrInst::Create(Struct, Idx, Idx + 2,
+ "gep_" + StructValues[i]->getName());
+ codeReplacer->getInstList().push_back(GEP);
+ StoreInst *SI = new StoreInst(StructValues[i], GEP);
+ codeReplacer->getInstList().push_back(SI);
+ }
+ }
+
+ // Emit the call to the function
+ CallInst *call = CallInst::Create(newFunction, params.begin(), params.end(),
+ NumExitBlocks > 1 ? "targetBlock" : "");
+ codeReplacer->getInstList().push_back(call);
+
+ Function::arg_iterator OutputArgBegin = newFunction->arg_begin();
+ unsigned FirstOut = inputs.size();
+ if (!AggregateArgs)
+ std::advance(OutputArgBegin, inputs.size());
+
+ // Reload the outputs passed in by reference
+ for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
+ Value *Output = 0;
+ if (AggregateArgs) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
+ GetElementPtrInst *GEP
+ = GetElementPtrInst::Create(Struct, Idx, Idx + 2,
+ "gep_reload_" + outputs[i]->getName());
+ codeReplacer->getInstList().push_back(GEP);
+ Output = GEP;
+ } else {
+ Output = ReloadOutputs[i];
+ }
+ LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload");
+ Reloads.push_back(load);
+ codeReplacer->getInstList().push_back(load);
+ std::vector<User*> Users(outputs[i]->use_begin(), outputs[i]->use_end());
+ for (unsigned u = 0, e = Users.size(); u != e; ++u) {
+ Instruction *inst = cast<Instruction>(Users[u]);
+ if (!BlocksToExtract.count(inst->getParent()))
+ inst->replaceUsesOfWith(outputs[i], load);
+ }
+ }
+
+ // Now we can emit a switch statement using the call as a value.
+ SwitchInst *TheSwitch =
+ SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)),
+ codeReplacer, 0, codeReplacer);
+
+ // Since there may be multiple exits from the original region, make the new
+ // function return an unsigned, switch on that number. This loop iterates
+ // over all of the blocks in the extracted region, updating any terminator
+ // instructions in the to-be-extracted region that branch to blocks that are
+ // not in the region to be extracted.
+ std::map<BasicBlock*, BasicBlock*> ExitBlockMap;
+
+ unsigned switchVal = 0;
+ for (SetVector<BasicBlock*>::const_iterator i = BlocksToExtract.begin(),
+ e = BlocksToExtract.end(); i != e; ++i) {
+ TerminatorInst *TI = (*i)->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ if (!BlocksToExtract.count(TI->getSuccessor(i))) {
+ BasicBlock *OldTarget = TI->getSuccessor(i);
+ // add a new basic block which returns the appropriate value
+ BasicBlock *&NewTarget = ExitBlockMap[OldTarget];
+ if (!NewTarget) {
+ // If we don't already have an exit stub for this non-extracted
+ // destination, create one now!
+ NewTarget = BasicBlock::Create(Context,
+ OldTarget->getName() + ".exitStub",
+ newFunction);
+ unsigned SuccNum = switchVal++;
+
+ Value *brVal = 0;
+ switch (NumExitBlocks) {
+ case 0:
+ case 1: break; // No value needed.
+ case 2: // Conditional branch, return a bool
+ brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum);
+ break;
+ default:
+ brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum);
+ break;
+ }
+
+ ReturnInst *NTRet = ReturnInst::Create(Context, brVal, NewTarget);
+
+ // Update the switch instruction.
+ TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context),
+ SuccNum),
+ OldTarget);
+
+ // Restore values just before we exit
+ Function::arg_iterator OAI = OutputArgBegin;
+ for (unsigned out = 0, e = outputs.size(); out != e; ++out) {
+ // For an invoke, the normal destination is the only one that is
+ // dominated by the result of the invocation
+ BasicBlock *DefBlock = cast<Instruction>(outputs[out])->getParent();
+
+ bool DominatesDef = true;
+
+ if (InvokeInst *Invoke = dyn_cast<InvokeInst>(outputs[out])) {
+ DefBlock = Invoke->getNormalDest();
+
+ // Make sure we are looking at the original successor block, not
+ // at a newly inserted exit block, which won't be in the dominator
+ // info.
+ for (std::map<BasicBlock*, BasicBlock*>::iterator I =
+ ExitBlockMap.begin(), E = ExitBlockMap.end(); I != E; ++I)
+ if (DefBlock == I->second) {
+ DefBlock = I->first;
+ break;
+ }
+
+ // In the extract block case, if the block we are extracting ends
+ // with an invoke instruction, make sure that we don't emit a
+ // store of the invoke value for the unwind block.
+ if (!DT && DefBlock != OldTarget)
+ DominatesDef = false;
+ }
+
+ if (DT) {
+ DominatesDef = DT->dominates(DefBlock, OldTarget);
+
+ // If the output value is used by a phi in the target block,
+ // then we need to test for dominance of the phi's predecessor
+ // instead. Unfortunately, this a little complicated since we
+ // have already rewritten uses of the value to uses of the reload.
+ BasicBlock* pred = FindPhiPredForUseInBlock(Reloads[out],
+ OldTarget);
+ if (pred && DT && DT->dominates(DefBlock, pred))
+ DominatesDef = true;
+ }
+
+ if (DominatesDef) {
+ if (AggregateArgs) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context),
+ FirstOut+out);
+ GetElementPtrInst *GEP =
+ GetElementPtrInst::Create(OAI, Idx, Idx + 2,
+ "gep_" + outputs[out]->getName(),
+ NTRet);
+ new StoreInst(outputs[out], GEP, NTRet);
+ } else {
+ new StoreInst(outputs[out], OAI, NTRet);
+ }
+ }
+ // Advance output iterator even if we don't emit a store
+ if (!AggregateArgs) ++OAI;
+ }
+ }
+
+ // rewrite the original branch instruction with this new target
+ TI->setSuccessor(i, NewTarget);
+ }
+ }
+
+ // Now that we've done the deed, simplify the switch instruction.
+ const Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType();
+ switch (NumExitBlocks) {
+ case 0:
+ // There are no successors (the block containing the switch itself), which
+ // means that previously this was the last part of the function, and hence
+ // this should be rewritten as a `ret'
+
+ // Check if the function should return a value
+ if (OldFnRetTy->isVoidTy()) {
+ ReturnInst::Create(Context, 0, TheSwitch); // Return void
+ } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {
+ // return what we have
+ ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch);
+ } else {
+ // Otherwise we must have code extracted an unwind or something, just
+ // return whatever we want.
+ ReturnInst::Create(Context,
+ Constant::getNullValue(OldFnRetTy), TheSwitch);
+ }
+
+ TheSwitch->eraseFromParent();
+ break;
+ case 1:
+ // Only a single destination, change the switch into an unconditional
+ // branch.
+ BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch);
+ TheSwitch->eraseFromParent();
+ break;
+ case 2:
+ BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2),
+ call, TheSwitch);
+ TheSwitch->eraseFromParent();
+ break;
+ default:
+ // Otherwise, make the default destination of the switch instruction be one
+ // of the other successors.
+ TheSwitch->setOperand(0, call);
+ TheSwitch->setSuccessor(0, TheSwitch->getSuccessor(NumExitBlocks));
+ TheSwitch->removeCase(NumExitBlocks); // Remove redundant case
+ break;
+ }
+}
+
+void CodeExtractor::moveCodeToFunction(Function *newFunction) {
+ Function *oldFunc = (*BlocksToExtract.begin())->getParent();
+ Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList();
+ Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList();
+
+ for (SetVector<BasicBlock*>::const_iterator i = BlocksToExtract.begin(),
+ e = BlocksToExtract.end(); i != e; ++i) {
+ // Delete the basic block from the old function, and the list of blocks
+ oldBlocks.remove(*i);
+
+ // Insert this basic block into the new function
+ newBlocks.push_back(*i);
+ }
+}
+
+/// ExtractRegion - Removes a loop from a function, replaces it with a call to
+/// new function. Returns pointer to the new function.
+///
+/// algorithm:
+///
+/// find inputs and outputs for the region
+///
+/// for inputs: add to function as args, map input instr* to arg#
+/// for outputs: add allocas for scalars,
+/// add to func as args, map output instr* to arg#
+///
+/// rewrite func to use argument #s instead of instr*
+///
+/// for each scalar output in the function: at every exit, store intermediate
+/// computed result back into memory.
+///
+Function *CodeExtractor::
+ExtractCodeRegion(const std::vector<BasicBlock*> &code) {
+ if (!isEligible(code))
+ return 0;
+
+ // 1) Find inputs, outputs
+ // 2) Construct new function
+ // * Add allocas for defs, pass as args by reference
+ // * Pass in uses as args
+ // 3) Move code region, add call instr to func
+ //
+ BlocksToExtract.insert(code.begin(), code.end());
+
+ Values inputs, outputs;
+
+ // Assumption: this is a single-entry code region, and the header is the first
+ // block in the region.
+ BasicBlock *header = code[0];
+
+ for (unsigned i = 1, e = code.size(); i != e; ++i)
+ for (pred_iterator PI = pred_begin(code[i]), E = pred_end(code[i]);
+ PI != E; ++PI)
+ assert(BlocksToExtract.count(*PI) &&
+ "No blocks in this region may have entries from outside the region"
+ " except for the first block!");
+
+ // If we have to split PHI nodes or the entry block, do so now.
+ severSplitPHINodes(header);
+
+ // If we have any return instructions in the region, split those blocks so
+ // that the return is not in the region.
+ splitReturnBlocks();
+
+ Function *oldFunction = header->getParent();
+
+ // This takes place of the original loop
+ BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),
+ "codeRepl", oldFunction,
+ header);
+
+ // The new function needs a root node because other nodes can branch to the
+ // head of the region, but the entry node of a function cannot have preds.
+ BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(),
+ "newFuncRoot");
+ newFuncRoot->getInstList().push_back(BranchInst::Create(header));
+
+ // Find inputs to, outputs from the code region.
+ findInputsOutputs(inputs, outputs);
+
+ // Construct new function based on inputs/outputs & add allocas for all defs.
+ Function *newFunction = constructFunction(inputs, outputs, header,
+ newFuncRoot,
+ codeReplacer, oldFunction,
+ oldFunction->getParent());
+
+ emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
+
+ moveCodeToFunction(newFunction);
+
+ // Loop over all of the PHI nodes in the header block, and change any
+ // references to the old incoming edge to be the new incoming edge.
+ for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!BlocksToExtract.count(PN->getIncomingBlock(i)))
+ PN->setIncomingBlock(i, newFuncRoot);
+ }
+
+ // Look at all successors of the codeReplacer block. If any of these blocks
+ // had PHI nodes in them, we need to update the "from" block to be the code
+ // replacer, not the original block in the extracted region.
+ std::vector<BasicBlock*> Succs(succ_begin(codeReplacer),
+ succ_end(codeReplacer));
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+ for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ std::set<BasicBlock*> ProcessedPreds;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (BlocksToExtract.count(PN->getIncomingBlock(i))) {
+ if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second)
+ PN->setIncomingBlock(i, codeReplacer);
+ else {
+ // There were multiple entries in the PHI for this block, now there
+ // is only one, so remove the duplicated entries.
+ PN->removeIncomingValue(i, false);
+ --i; --e;
+ }
+ }
+ }
+
+ //cerr << "NEW FUNCTION: " << *newFunction;
+ // verifyFunction(*newFunction);
+
+ // cerr << "OLD FUNCTION: " << *oldFunction;
+ // verifyFunction(*oldFunction);
+
+ DEBUG(if (verifyFunction(*newFunction))
+ report_fatal_error("verifyFunction failed!"));
+ return newFunction;
+}
+
+bool CodeExtractor::isEligible(const std::vector<BasicBlock*> &code) {
+ // Deny code region if it contains allocas or vastarts.
+ for (std::vector<BasicBlock*>::const_iterator BB = code.begin(), e=code.end();
+ BB != e; ++BB)
+ for (BasicBlock::const_iterator I = (*BB)->begin(), Ie = (*BB)->end();
+ I != Ie; ++I)
+ if (isa<AllocaInst>(*I))
+ return false;
+ else if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (const Function *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::vastart)
+ return false;
+ return true;
+}
+
+
+/// ExtractCodeRegion - slurp a sequence of basic blocks into a brand new
+/// function
+///
+Function* llvm::ExtractCodeRegion(DominatorTree &DT,
+ const std::vector<BasicBlock*> &code,
+ bool AggregateArgs) {
+ return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(code);
+}
+
+/// ExtractBasicBlock - slurp a natural loop into a brand new function
+///
+Function* llvm::ExtractLoop(DominatorTree &DT, Loop *L, bool AggregateArgs) {
+ return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(L->getBlocks());
+}
+
+/// ExtractBasicBlock - slurp a basic block into a brand new function
+///
+Function* llvm::ExtractBasicBlock(BasicBlock *BB, bool AggregateArgs) {
+ std::vector<BasicBlock*> Blocks;
+ Blocks.push_back(BB);
+ return CodeExtractor(0, AggregateArgs).ExtractCodeRegion(Blocks);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
new file mode 100644
index 0000000..8e82a02
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -0,0 +1,146 @@
+//===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provide the function DemoteRegToStack(). This function takes a
+// virtual register computed by an Instruction and replaces it with a slot in
+// the stack frame, allocated via alloca. It returns the pointer to the
+// AllocaInst inserted. After this function is called on an instruction, we are
+// guaranteed that the only user of the instruction is a store that is
+// immediately after it.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include <map>
+using namespace llvm;
+
+/// DemoteRegToStack - This function takes a virtual register computed by an
+/// Instruction and replaces it with a slot in the stack frame, allocated via
+/// alloca. This allows the CFG to be changed around without fear of
+/// invalidating the SSA information for the value. It returns the pointer to
+/// the alloca inserted to create a stack slot for I.
+///
+AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
+ Instruction *AllocaPoint) {
+ if (I.use_empty()) {
+ I.eraseFromParent();
+ return 0;
+ }
+
+ // Create a stack slot to hold the value.
+ AllocaInst *Slot;
+ if (AllocaPoint) {
+ Slot = new AllocaInst(I.getType(), 0,
+ I.getName()+".reg2mem", AllocaPoint);
+ } else {
+ Function *F = I.getParent()->getParent();
+ Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem",
+ F->getEntryBlock().begin());
+ }
+
+ // Change all of the users of the instruction to read from the stack slot
+ // instead.
+ while (!I.use_empty()) {
+ Instruction *U = cast<Instruction>(I.use_back());
+ if (PHINode *PN = dyn_cast<PHINode>(U)) {
+ // If this is a PHI node, we can't insert a load of the value before the
+ // use. Instead, insert the load in the predecessor block corresponding
+ // to the incoming value.
+ //
+ // Note that if there are multiple edges from a basic block to this PHI
+ // node that we cannot multiple loads. The problem is that the resultant
+ // PHI node will have multiple values (from each load) coming in from the
+ // same block, which is illegal SSA form. For this reason, we keep track
+ // and reuse loads we insert.
+ std::map<BasicBlock*, Value*> Loads;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == &I) {
+ Value *&V = Loads[PN->getIncomingBlock(i)];
+ if (V == 0) {
+ // Insert the load into the predecessor block
+ V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads,
+ PN->getIncomingBlock(i)->getTerminator());
+ }
+ PN->setIncomingValue(i, V);
+ }
+
+ } else {
+ // If this is a normal instruction, just insert a load.
+ Value *V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, U);
+ U->replaceUsesOfWith(&I, V);
+ }
+ }
+
+
+ // Insert stores of the computed value into the stack slot. We have to be
+ // careful is I is an invoke instruction though, because we can't insert the
+ // store AFTER the terminator instruction.
+ BasicBlock::iterator InsertPt;
+ if (!isa<TerminatorInst>(I)) {
+ InsertPt = &I;
+ ++InsertPt;
+ } else {
+ // We cannot demote invoke instructions to the stack if their normal edge
+ // is critical.
+ InvokeInst &II = cast<InvokeInst>(I);
+ assert(II.getNormalDest()->getSinglePredecessor() &&
+ "Cannot demote invoke with a critical successor!");
+ InsertPt = II.getNormalDest()->begin();
+ }
+
+ for (; isa<PHINode>(InsertPt); ++InsertPt)
+ /* empty */; // Don't insert before any PHI nodes.
+ new StoreInst(&I, Slot, InsertPt);
+
+ return Slot;
+}
+
+
+/// DemotePHIToStack - This function takes a virtual register computed by a phi
+/// node and replaces it with a slot in the stack frame, allocated via alloca.
+/// The phi node is deleted and it returns the pointer to the alloca inserted.
+AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
+ if (P->use_empty()) {
+ P->eraseFromParent();
+ return 0;
+ }
+
+ // Create a stack slot to hold the value.
+ AllocaInst *Slot;
+ if (AllocaPoint) {
+ Slot = new AllocaInst(P->getType(), 0,
+ P->getName()+".reg2mem", AllocaPoint);
+ } else {
+ Function *F = P->getParent()->getParent();
+ Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem",
+ F->getEntryBlock().begin());
+ }
+
+ // Iterate over each operand, insert store in each predecessor.
+ for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) {
+ assert(II->getParent() != P->getIncomingBlock(i) &&
+ "Invoke edge not supported yet"); II=II;
+ }
+ new StoreInst(P->getIncomingValue(i), Slot,
+ P->getIncomingBlock(i)->getTerminator());
+ }
+
+ // Insert load in place of the phi and replace all uses.
+ Value *V = new LoadInst(Slot, P->getName()+".reload", P);
+ P->replaceAllUsesWith(V);
+
+ // Delete phi.
+ P->eraseFromParent();
+
+ return Slot;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
new file mode 100644
index 0000000..88979e86
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -0,0 +1,668 @@
+//===- InlineFunction.cpp - Code to perform function inlining -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements inlining of a function into a call site, resolving
+// parameters and the return value as appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Attributes.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CallSite.h"
+using namespace llvm;
+
+bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI) {
+ return InlineFunction(CallSite(CI), IFI);
+}
+bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI) {
+ return InlineFunction(CallSite(II), IFI);
+}
+
+
+/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
+/// an invoke, we have to turn all of the calls that can throw into
+/// invokes. This function analyze BB to see if there are any calls, and if so,
+/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
+/// nodes in that block with the values specified in InvokeDestPHIValues.
+///
+static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
+ BasicBlock *InvokeDest,
+ const SmallVectorImpl<Value*> &InvokeDestPHIValues) {
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+ Instruction *I = BBI++;
+
+ // We only need to check for function calls: inlined invoke
+ // instructions require no special handling.
+ CallInst *CI = dyn_cast<CallInst>(I);
+ if (CI == 0) continue;
+
+ // If this call cannot unwind, don't convert it to an invoke.
+ if (CI->doesNotThrow())
+ continue;
+
+ // Convert this function call into an invoke instruction.
+ // First, split the basic block.
+ BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
+
+ // Next, create the new invoke instruction, inserting it at the end
+ // of the old basic block.
+ ImmutableCallSite CS(CI);
+ SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end());
+ InvokeInst *II =
+ InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest,
+ InvokeArgs.begin(), InvokeArgs.end(),
+ CI->getName(), BB->getTerminator());
+ II->setCallingConv(CI->getCallingConv());
+ II->setAttributes(CI->getAttributes());
+
+ // Make sure that anything using the call now uses the invoke! This also
+ // updates the CallGraph if present, because it uses a WeakVH.
+ CI->replaceAllUsesWith(II);
+
+ // Delete the unconditional branch inserted by splitBasicBlock
+ BB->getInstList().pop_back();
+ Split->getInstList().pop_front(); // Delete the original call
+
+ // Update any PHI nodes in the exceptional block to indicate that
+ // there is now a new entry in them.
+ unsigned i = 0;
+ for (BasicBlock::iterator I = InvokeDest->begin();
+ isa<PHINode>(I); ++I, ++i)
+ cast<PHINode>(I)->addIncoming(InvokeDestPHIValues[i], BB);
+
+ // This basic block is now complete, the caller will continue scanning the
+ // next one.
+ return;
+ }
+}
+
+
+/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
+/// in the body of the inlined function into invokes and turn unwind
+/// instructions into branches to the invoke unwind dest.
+///
+/// II is the invoke instruction being inlined. FirstNewBlock is the first
+/// block of the inlined code (the last block is the end of the function),
+/// and InlineCodeInfo is information about the code that got inlined.
+static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
+ ClonedCodeInfo &InlinedCodeInfo) {
+ BasicBlock *InvokeDest = II->getUnwindDest();
+ SmallVector<Value*, 8> InvokeDestPHIValues;
+
+ // If there are PHI nodes in the unwind destination block, we need to
+ // keep track of which values came into them from this invoke, then remove
+ // the entry for this block.
+ BasicBlock *InvokeBlock = II->getParent();
+ for (BasicBlock::iterator I = InvokeDest->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ // Save the value to use for this edge.
+ InvokeDestPHIValues.push_back(PN->getIncomingValueForBlock(InvokeBlock));
+ }
+
+ Function *Caller = FirstNewBlock->getParent();
+
+ // The inlined code is currently at the end of the function, scan from the
+ // start of the inlined code to its end, checking for stuff we need to
+ // rewrite. If the code doesn't have calls or unwinds, we know there is
+ // nothing to rewrite.
+ if (!InlinedCodeInfo.ContainsCalls && !InlinedCodeInfo.ContainsUnwinds) {
+ // Now that everything is happy, we have one final detail. The PHI nodes in
+ // the exception destination block still have entries due to the original
+ // invoke instruction. Eliminate these entries (which might even delete the
+ // PHI node) now.
+ InvokeDest->removePredecessor(II->getParent());
+ return;
+ }
+
+ for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
+ if (InlinedCodeInfo.ContainsCalls)
+ HandleCallsInBlockInlinedThroughInvoke(BB, InvokeDest,
+ InvokeDestPHIValues);
+
+ if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+ // An UnwindInst requires special handling when it gets inlined into an
+ // invoke site. Once this happens, we know that the unwind would cause
+ // a control transfer to the invoke exception destination, so we can
+ // transform it into a direct branch to the exception destination.
+ BranchInst::Create(InvokeDest, UI);
+
+ // Delete the unwind instruction!
+ UI->eraseFromParent();
+
+ // Update any PHI nodes in the exceptional block to indicate that
+ // there is now a new entry in them.
+ unsigned i = 0;
+ for (BasicBlock::iterator I = InvokeDest->begin();
+ isa<PHINode>(I); ++I, ++i) {
+ PHINode *PN = cast<PHINode>(I);
+ PN->addIncoming(InvokeDestPHIValues[i], BB);
+ }
+ }
+ }
+
+ // Now that everything is happy, we have one final detail. The PHI nodes in
+ // the exception destination block still have entries due to the original
+ // invoke instruction. Eliminate these entries (which might even delete the
+ // PHI node) now.
+ InvokeDest->removePredecessor(II->getParent());
+}
+
+/// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee
+/// into the caller, update the specified callgraph to reflect the changes we
+/// made. Note that it's possible that not all code was copied over, so only
+/// some edges of the callgraph may remain.
+static void UpdateCallGraphAfterInlining(CallSite CS,
+ Function::iterator FirstNewBlock,
+ ValueMap<const Value*, Value*> &VMap,
+ InlineFunctionInfo &IFI) {
+ CallGraph &CG = *IFI.CG;
+ const Function *Caller = CS.getInstruction()->getParent()->getParent();
+ const Function *Callee = CS.getCalledFunction();
+ CallGraphNode *CalleeNode = CG[Callee];
+ CallGraphNode *CallerNode = CG[Caller];
+
+ // Since we inlined some uninlined call sites in the callee into the caller,
+ // add edges from the caller to all of the callees of the callee.
+ CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end();
+
+ // Consider the case where CalleeNode == CallerNode.
+ CallGraphNode::CalledFunctionsVector CallCache;
+ if (CalleeNode == CallerNode) {
+ CallCache.assign(I, E);
+ I = CallCache.begin();
+ E = CallCache.end();
+ }
+
+ for (; I != E; ++I) {
+ const Value *OrigCall = I->first;
+
+ ValueMap<const Value*, Value*>::iterator VMI = VMap.find(OrigCall);
+ // Only copy the edge if the call was inlined!
+ if (VMI == VMap.end() || VMI->second == 0)
+ continue;
+
+ // If the call was inlined, but then constant folded, there is no edge to
+ // add. Check for this case.
+ Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
+ if (NewCall == 0) continue;
+
+ // Remember that this call site got inlined for the client of
+ // InlineFunction.
+ IFI.InlinedCalls.push_back(NewCall);
+
+ // It's possible that inlining the callsite will cause it to go from an
+ // indirect to a direct call by resolving a function pointer. If this
+ // happens, set the callee of the new call site to a more precise
+ // destination. This can also happen if the call graph node of the caller
+ // was just unnecessarily imprecise.
+ if (I->second->getFunction() == 0)
+ if (Function *F = CallSite(NewCall).getCalledFunction()) {
+ // Indirect call site resolved to direct call.
+ CallerNode->addCalledFunction(CallSite(NewCall), CG[F]);
+
+ continue;
+ }
+
+ CallerNode->addCalledFunction(CallSite(NewCall), I->second);
+ }
+
+ // Update the call graph by deleting the edge from Callee to Caller. We must
+ // do this after the loop above in case Caller and Callee are the same.
+ CallerNode->removeCallEdgeFor(CS);
+}
+
+// InlineFunction - This function inlines the called function into the basic
+// block of the caller. This returns false if it is not possible to inline this
+// call. The program is still in a well defined state if this occurs though.
+//
+// Note that this only does one level of inlining. For example, if the
+// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
+// exists in the instruction stream. Similiarly this will inline a recursive
+// function by one level.
+//
+bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
+ Instruction *TheCall = CS.getInstruction();
+ LLVMContext &Context = TheCall->getContext();
+ assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
+ "Instruction not in function!");
+
+ // If IFI has any state in it, zap it before we fill it in.
+ IFI.reset();
+
+ const Function *CalledFunc = CS.getCalledFunction();
+ if (CalledFunc == 0 || // Can't inline external function or indirect
+ CalledFunc->isDeclaration() || // call, or call to a vararg function!
+ CalledFunc->getFunctionType()->isVarArg()) return false;
+
+
+ // If the call to the callee is not a tail call, we must clear the 'tail'
+ // flags on any calls that we inline.
+ bool MustClearTailCallFlags =
+ !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall());
+
+ // If the call to the callee cannot throw, set the 'nounwind' flag on any
+ // calls that we inline.
+ bool MarkNoUnwind = CS.doesNotThrow();
+
+ BasicBlock *OrigBB = TheCall->getParent();
+ Function *Caller = OrigBB->getParent();
+
+ // GC poses two hazards to inlining, which only occur when the callee has GC:
+ // 1. If the caller has no GC, then the callee's GC must be propagated to the
+ // caller.
+ // 2. If the caller has a differing GC, it is invalid to inline.
+ if (CalledFunc->hasGC()) {
+ if (!Caller->hasGC())
+ Caller->setGC(CalledFunc->getGC());
+ else if (CalledFunc->getGC() != Caller->getGC())
+ return false;
+ }
+
+ // Get an iterator to the last basic block in the function, which will have
+ // the new function inlined after it.
+ //
+ Function::iterator LastBlock = &Caller->back();
+
+ // Make sure to capture all of the return instructions from the cloned
+ // function.
+ SmallVector<ReturnInst*, 8> Returns;
+ ClonedCodeInfo InlinedFunctionInfo;
+ Function::iterator FirstNewBlock;
+
+ { // Scope to destroy VMap after cloning.
+ ValueMap<const Value*, Value*> VMap;
+
+ assert(CalledFunc->arg_size() == CS.arg_size() &&
+ "No varargs calls can be inlined!");
+
+ // Calculate the vector of arguments to pass into the function cloner, which
+ // matches up the formal to the actual argument values.
+ CallSite::arg_iterator AI = CS.arg_begin();
+ unsigned ArgNo = 0;
+ for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
+ E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
+ Value *ActualArg = *AI;
+
+ // When byval arguments actually inlined, we need to make the copy implied
+ // by them explicit. However, we don't do this if the callee is readonly
+ // or readnone, because the copy would be unneeded: the callee doesn't
+ // modify the struct.
+ if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) &&
+ !CalledFunc->onlyReadsMemory()) {
+ const Type *AggTy = cast<PointerType>(I->getType())->getElementType();
+ const Type *VoidPtrTy =
+ Type::getInt8PtrTy(Context);
+
+ // Create the alloca. If we have TargetData, use nice alignment.
+ unsigned Align = 1;
+ if (IFI.TD) Align = IFI.TD->getPrefTypeAlignment(AggTy);
+ Value *NewAlloca = new AllocaInst(AggTy, 0, Align,
+ I->getName(),
+ &*Caller->begin()->begin());
+ // Emit a memcpy.
+ const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
+ Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
+ Intrinsic::memcpy,
+ Tys, 3);
+ Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
+ Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall);
+
+ Value *Size;
+ if (IFI.TD == 0)
+ Size = ConstantExpr::getSizeOf(AggTy);
+ else
+ Size = ConstantInt::get(Type::getInt64Ty(Context),
+ IFI.TD->getTypeStoreSize(AggTy));
+
+ // Always generate a memcpy of alignment 1 here because we don't know
+ // the alignment of the src pointer. Other optimizations can infer
+ // better alignment.
+ Value *CallArgs[] = {
+ DestCast, SrcCast, Size,
+ ConstantInt::get(Type::getInt32Ty(Context), 1),
+ ConstantInt::get(Type::getInt1Ty(Context), 0)
+ };
+ CallInst *TheMemCpy =
+ CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall);
+
+ // If we have a call graph, update it.
+ if (CallGraph *CG = IFI.CG) {
+ CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn);
+ CallGraphNode *CallerNode = (*CG)[Caller];
+ CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN);
+ }
+
+ // Uses of the argument in the function should use our new alloca
+ // instead.
+ ActualArg = NewAlloca;
+
+ // Calls that we inline may use the new alloca, so we need to clear
+ // their 'tail' flags.
+ MustClearTailCallFlags = true;
+ }
+
+ VMap[I] = ActualArg;
+ }
+
+ // We want the inliner to prune the code as it copies. We would LOVE to
+ // have no dead or constant instructions leftover after inlining occurs
+ // (which can happen, e.g., because an argument was constant), but we'll be
+ // happy with whatever the cloner can do.
+ CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
+ /*ModuleLevelChanges=*/false, Returns, ".i",
+ &InlinedFunctionInfo, IFI.TD, TheCall);
+
+ // Remember the first block that is newly cloned over.
+ FirstNewBlock = LastBlock; ++FirstNewBlock;
+
+ // Update the callgraph if requested.
+ if (IFI.CG)
+ UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
+ }
+
+ // If there are any alloca instructions in the block that used to be the entry
+ // block for the callee, move them to the entry block of the caller. First
+ // calculate which instruction they should be inserted before. We insert the
+ // instructions at the end of the current alloca list.
+ //
+ {
+ BasicBlock::iterator InsertPoint = Caller->begin()->begin();
+ for (BasicBlock::iterator I = FirstNewBlock->begin(),
+ E = FirstNewBlock->end(); I != E; ) {
+ AllocaInst *AI = dyn_cast<AllocaInst>(I++);
+ if (AI == 0) continue;
+
+ // If the alloca is now dead, remove it. This often occurs due to code
+ // specialization.
+ if (AI->use_empty()) {
+ AI->eraseFromParent();
+ continue;
+ }
+
+ if (!isa<Constant>(AI->getArraySize()))
+ continue;
+
+ // Keep track of the static allocas that we inline into the caller if the
+ // StaticAllocas pointer is non-null.
+ IFI.StaticAllocas.push_back(AI);
+
+ // Scan for the block of allocas that we can move over, and move them
+ // all at once.
+ while (isa<AllocaInst>(I) &&
+ isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
+ IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
+ ++I;
+ }
+
+ // Transfer all of the allocas over in a block. Using splice means
+ // that the instructions aren't removed from the symbol table, then
+ // reinserted.
+ Caller->getEntryBlock().getInstList().splice(InsertPoint,
+ FirstNewBlock->getInstList(),
+ AI, I);
+ }
+ }
+
+ // If the inlined code contained dynamic alloca instructions, wrap the inlined
+ // code with llvm.stacksave/llvm.stackrestore intrinsics.
+ if (InlinedFunctionInfo.ContainsDynamicAllocas) {
+ Module *M = Caller->getParent();
+ // Get the two intrinsics we care about.
+ Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
+ Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
+
+ // If we are preserving the callgraph, add edges to the stacksave/restore
+ // functions for the calls we insert.
+ CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0;
+ if (CallGraph *CG = IFI.CG) {
+ StackSaveCGN = CG->getOrInsertFunction(StackSave);
+ StackRestoreCGN = CG->getOrInsertFunction(StackRestore);
+ CallerNode = (*CG)[Caller];
+ }
+
+ // Insert the llvm.stacksave.
+ CallInst *SavedPtr = CallInst::Create(StackSave, "savedstack",
+ FirstNewBlock->begin());
+ if (IFI.CG) CallerNode->addCalledFunction(SavedPtr, StackSaveCGN);
+
+ // Insert a call to llvm.stackrestore before any return instructions in the
+ // inlined function.
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", Returns[i]);
+ if (IFI.CG) CallerNode->addCalledFunction(CI, StackRestoreCGN);
+ }
+
+ // Count the number of StackRestore calls we insert.
+ unsigned NumStackRestores = Returns.size();
+
+ // If we are inlining an invoke instruction, insert restores before each
+ // unwind. These unwinds will be rewritten into branches later.
+ if (InlinedFunctionInfo.ContainsUnwinds && isa<InvokeInst>(TheCall)) {
+ for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+ BB != E; ++BB)
+ if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+ CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", UI);
+ if (IFI.CG) CallerNode->addCalledFunction(CI, StackRestoreCGN);
+ ++NumStackRestores;
+ }
+ }
+ }
+
+ // If we are inlining tail call instruction through a call site that isn't
+ // marked 'tail', we must remove the tail marker for any calls in the inlined
+ // code. Also, calls inlined through a 'nounwind' call site should be marked
+ // 'nounwind'.
+ if (InlinedFunctionInfo.ContainsCalls &&
+ (MustClearTailCallFlags || MarkNoUnwind)) {
+ for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+ BB != E; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (MustClearTailCallFlags)
+ CI->setTailCall(false);
+ if (MarkNoUnwind)
+ CI->setDoesNotThrow();
+ }
+ }
+
+ // If we are inlining through a 'nounwind' call site then any inlined 'unwind'
+ // instructions are unreachable.
+ if (InlinedFunctionInfo.ContainsUnwinds && MarkNoUnwind)
+ for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+ BB != E; ++BB) {
+ TerminatorInst *Term = BB->getTerminator();
+ if (isa<UnwindInst>(Term)) {
+ new UnreachableInst(Context, Term);
+ BB->getInstList().erase(Term);
+ }
+ }
+
+ // If we are inlining for an invoke instruction, we must make sure to rewrite
+ // any inlined 'unwind' instructions into branches to the invoke exception
+ // destination, and call instructions into invoke instructions.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
+ HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
+
+ // If we cloned in _exactly one_ basic block, and if that block ends in a
+ // return instruction, we splice the body of the inlined callee directly into
+ // the calling basic block.
+ if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
+ // Move all of the instructions right before the call.
+ OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
+ FirstNewBlock->begin(), FirstNewBlock->end());
+ // Remove the cloned basic block.
+ Caller->getBasicBlockList().pop_back();
+
+ // If the call site was an invoke instruction, add a branch to the normal
+ // destination.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
+ BranchInst::Create(II->getNormalDest(), TheCall);
+
+ // If the return instruction returned a value, replace uses of the call with
+ // uses of the returned value.
+ if (!TheCall->use_empty()) {
+ ReturnInst *R = Returns[0];
+ if (TheCall == R->getReturnValue())
+ TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+ else
+ TheCall->replaceAllUsesWith(R->getReturnValue());
+ }
+ // Since we are now done with the Call/Invoke, we can delete it.
+ TheCall->eraseFromParent();
+
+ // Since we are now done with the return instruction, delete it also.
+ Returns[0]->eraseFromParent();
+
+ // We are now done with the inlining.
+ return true;
+ }
+
+ // Otherwise, we have the normal case, of more than one block to inline or
+ // multiple return sites.
+
+ // We want to clone the entire callee function into the hole between the
+ // "starter" and "ender" blocks. How we accomplish this depends on whether
+ // this is an invoke instruction or a call instruction.
+ BasicBlock *AfterCallBB;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+
+ // Add an unconditional branch to make this look like the CallInst case...
+ BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+
+ // Split the basic block. This guarantees that no PHI nodes will have to be
+ // updated due to new incoming edges, and make the invoke case more
+ // symmetric to the call case.
+ AfterCallBB = OrigBB->splitBasicBlock(NewBr,
+ CalledFunc->getName()+".exit");
+
+ } else { // It's a call
+ // If this is a call instruction, we need to split the basic block that
+ // the call lives in.
+ //
+ AfterCallBB = OrigBB->splitBasicBlock(TheCall,
+ CalledFunc->getName()+".exit");
+ }
+
+ // Change the branch that used to go to AfterCallBB to branch to the first
+ // basic block of the inlined function.
+ //
+ TerminatorInst *Br = OrigBB->getTerminator();
+ assert(Br && Br->getOpcode() == Instruction::Br &&
+ "splitBasicBlock broken!");
+ Br->setOperand(0, FirstNewBlock);
+
+
+ // Now that the function is correct, make it a little bit nicer. In
+ // particular, move the basic blocks inserted from the end of the function
+ // into the space made by splitting the source basic block.
+ Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
+ FirstNewBlock, Caller->end());
+
+ // Handle all of the return instructions that we just cloned in, and eliminate
+ // any users of the original call/invoke instruction.
+ const Type *RTy = CalledFunc->getReturnType();
+
+ if (Returns.size() > 1) {
+ // The PHI node should go at the front of the new basic block to merge all
+ // possible incoming values.
+ PHINode *PHI = 0;
+ if (!TheCall->use_empty()) {
+ PHI = PHINode::Create(RTy, TheCall->getName(),
+ AfterCallBB->begin());
+ // Anything that used the result of the function call should now use the
+ // PHI node as their operand.
+ TheCall->replaceAllUsesWith(PHI);
+ }
+
+ // Loop over all of the return instructions adding entries to the PHI node
+ // as appropriate.
+ if (PHI) {
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ ReturnInst *RI = Returns[i];
+ assert(RI->getReturnValue()->getType() == PHI->getType() &&
+ "Ret value not consistent in function!");
+ PHI->addIncoming(RI->getReturnValue(), RI->getParent());
+ }
+
+ // Now that we inserted the PHI, check to see if it has a single value
+ // (e.g. all the entries are the same or undef). If so, remove the PHI so
+ // it doesn't block other optimizations.
+ if (Value *V = PHI->hasConstantValue()) {
+ PHI->replaceAllUsesWith(V);
+ PHI->eraseFromParent();
+ }
+ }
+
+
+ // Add a branch to the merge points and remove return instructions.
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ ReturnInst *RI = Returns[i];
+ BranchInst::Create(AfterCallBB, RI);
+ RI->eraseFromParent();
+ }
+ } else if (!Returns.empty()) {
+ // Otherwise, if there is exactly one return value, just replace anything
+ // using the return value of the call with the computed value.
+ if (!TheCall->use_empty()) {
+ if (TheCall == Returns[0]->getReturnValue())
+ TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+ else
+ TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
+ }
+
+ // Splice the code from the return block into the block that it will return
+ // to, which contains the code that was after the call.
+ BasicBlock *ReturnBB = Returns[0]->getParent();
+ AfterCallBB->getInstList().splice(AfterCallBB->begin(),
+ ReturnBB->getInstList());
+
+ // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
+ ReturnBB->replaceAllUsesWith(AfterCallBB);
+
+ // Delete the return instruction now and empty ReturnBB now.
+ Returns[0]->eraseFromParent();
+ ReturnBB->eraseFromParent();
+ } else if (!TheCall->use_empty()) {
+ // No returns, but something is using the return value of the call. Just
+ // nuke the result.
+ TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+ }
+
+ // Since we are now done with the Call/Invoke, we can delete it.
+ TheCall->eraseFromParent();
+
+ // We should always be able to fold the entry block of the function into the
+ // single predecessor of the block...
+ assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
+ BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);
+
+ // Splice the code entry block into calling block, right before the
+ // unconditional branch.
+ OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
+ CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes
+
+ // Remove the unconditional branch.
+ OrigBB->getInstList().erase(Br);
+
+ // Now we can remove the CalleeEntry block, which is now empty.
+ Caller->getBasicBlockList().erase(CalleeEntry);
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
new file mode 100644
index 0000000..5ca8299
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -0,0 +1,63 @@
+//===- InstructionNamer.cpp - Give anonymous instructions names -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a little utility pass that gives instructions names, this is mostly
+// useful when diffing the effect of an optimization because deleting an
+// unnamed instruction can change all other instruction numbering, making the
+// diff very noisy.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+using namespace llvm;
+
+namespace {
+ struct InstNamer : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ InstNamer() : FunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.setPreservesAll();
+ }
+
+ bool runOnFunction(Function &F) {
+ for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
+ AI != AE; ++AI)
+ if (!AI->hasName() && !AI->getType()->isVoidTy())
+ AI->setName("arg");
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (!BB->hasName())
+ BB->setName("bb");
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (!I->hasName() && !I->getType()->isVoidTy())
+ I->setName("tmp");
+ }
+ return true;
+ }
+ };
+
+ char InstNamer::ID = 0;
+ INITIALIZE_PASS(InstNamer, "instnamer",
+ "Assign names to anonymous instructions", false, false);
+}
+
+
+char &llvm::InstructionNamerID = InstNamer::ID;
+//===----------------------------------------------------------------------===//
+//
+// InstructionNamer - Give any unnamed non-void instructions "tmp" names.
+//
+FunctionPass *llvm::createInstructionNamerPass() {
+ return new InstNamer();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
new file mode 100644
index 0000000..275b265
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -0,0 +1,266 @@
+//===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms loops by placing phi nodes at the end of the loops for
+// all values that are live across the loop boundary. For example, it turns
+// the left into the right code:
+//
+// for (...) for (...)
+// if (c) if (c)
+// X1 = ... X1 = ...
+// else else
+// X2 = ... X2 = ...
+// X3 = phi(X1, X2) X3 = phi(X1, X2)
+// ... = X3 + 4 X4 = phi(X3)
+// ... = X4 + 4
+//
+// This is still valid LLVM; the extra phi nodes are purely redundant, and will
+// be trivially eliminated by InstCombine. The major benefit of this
+// transformation is that it makes many other loop optimizations, such as
+// LoopUnswitching, simpler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lcssa"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/PredIteratorCache.h"
+using namespace llvm;
+
+STATISTIC(NumLCSSA, "Number of live out of a loop variables");
+
+namespace {
+ struct LCSSA : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+ LCSSA() : LoopPass(ID) {}
+
+ // Cached analysis information for the current function.
+ DominatorTree *DT;
+ std::vector<BasicBlock*> LoopBlocks;
+ PredIteratorCache PredCache;
+ Loop *L;
+
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG. It maintains both of these,
+ /// as well as the CFG. It also requires dominator information.
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominanceFrontier>();
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreserved<ScalarEvolution>();
+ }
+ private:
+ bool ProcessInstruction(Instruction *Inst,
+ const SmallVectorImpl<BasicBlock*> &ExitBlocks);
+
+ /// verifyAnalysis() - Verify loop nest.
+ virtual void verifyAnalysis() const {
+ // Check the special guarantees that LCSSA makes.
+ assert(L->isLCSSAForm(*DT) && "LCSSA form not preserved!");
+ }
+
+ /// inLoop - returns true if the given block is within the current loop
+ bool inLoop(BasicBlock *B) const {
+ return std::binary_search(LoopBlocks.begin(), LoopBlocks.end(), B);
+ }
+ };
+}
+
+char LCSSA::ID = 0;
+INITIALIZE_PASS(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false);
+
+Pass *llvm::createLCSSAPass() { return new LCSSA(); }
+char &llvm::LCSSAID = LCSSA::ID;
+
+
+/// BlockDominatesAnExit - Return true if the specified block dominates at least
+/// one of the blocks in the specified list.
+static bool BlockDominatesAnExit(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock*> &ExitBlocks,
+ DominatorTree *DT) {
+ DomTreeNode *DomNode = DT->getNode(BB);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (DT->dominates(DomNode, DT->getNode(ExitBlocks[i])))
+ return true;
+
+ return false;
+}
+
+
+/// runOnFunction - Process all loops in the function, inner-most out.
+bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
+ L = TheLoop;
+
+ DT = &getAnalysis<DominatorTree>();
+
+ // Get the set of exiting blocks.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+
+ if (ExitBlocks.empty())
+ return false;
+
+ // Speed up queries by creating a sorted vector of blocks.
+ LoopBlocks.clear();
+ LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
+ array_pod_sort(LoopBlocks.begin(), LoopBlocks.end());
+
+ // Look at all the instructions in the loop, checking to see if they have uses
+ // outside the loop. If so, rewrite those uses.
+ bool MadeChange = false;
+
+ for (Loop::block_iterator BBI = L->block_begin(), E = L->block_end();
+ BBI != E; ++BBI) {
+ BasicBlock *BB = *BBI;
+
+ // For large loops, avoid use-scanning by using dominance information: In
+ // particular, if a block does not dominate any of the loop exits, then none
+ // of the values defined in the block could be used outside the loop.
+ if (!BlockDominatesAnExit(BB, ExitBlocks, DT))
+ continue;
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ // Reject two common cases fast: instructions with no uses (like stores)
+ // and instructions with one use that is in the same block as this.
+ if (I->use_empty() ||
+ (I->hasOneUse() && I->use_back()->getParent() == BB &&
+ !isa<PHINode>(I->use_back())))
+ continue;
+
+ MadeChange |= ProcessInstruction(I, ExitBlocks);
+ }
+ }
+
+ assert(L->isLCSSAForm(*DT));
+ PredCache.clear();
+
+ return MadeChange;
+}
+
+/// isExitBlock - Return true if the specified block is in the list.
+static bool isExitBlock(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (ExitBlocks[i] == BB)
+ return true;
+ return false;
+}
+
+/// ProcessInstruction - Given an instruction in the loop, check to see if it
+/// has any uses that are outside the current loop. If so, insert LCSSA PHI
+/// nodes and rewrite the uses.
+bool LCSSA::ProcessInstruction(Instruction *Inst,
+ const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+ SmallVector<Use*, 16> UsesToRewrite;
+
+ BasicBlock *InstBB = Inst->getParent();
+
+ for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+ UI != E; ++UI) {
+ User *U = *UI;
+ BasicBlock *UserBB = cast<Instruction>(U)->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(U))
+ UserBB = PN->getIncomingBlock(UI);
+
+ if (InstBB != UserBB && !inLoop(UserBB))
+ UsesToRewrite.push_back(&UI.getUse());
+ }
+
+ // If there are no uses outside the loop, exit with no change.
+ if (UsesToRewrite.empty()) return false;
+
+ ++NumLCSSA; // We are applying the transformation
+
+ // Invoke instructions are special in that their result value is not available
+ // along their unwind edge. The code below tests to see whether DomBB dominates
+ // the value, so adjust DomBB to the normal destination block, which is
+ // effectively where the value is first usable.
+ BasicBlock *DomBB = Inst->getParent();
+ if (InvokeInst *Inv = dyn_cast<InvokeInst>(Inst))
+ DomBB = Inv->getNormalDest();
+
+ DomTreeNode *DomNode = DT->getNode(DomBB);
+
+ SSAUpdater SSAUpdate;
+ SSAUpdate.Initialize(Inst->getType(), Inst->getName());
+
+ // Insert the LCSSA phi's into all of the exit blocks dominated by the
+ // value, and add them to the Phi's map.
+ for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(),
+ BBE = ExitBlocks.end(); BBI != BBE; ++BBI) {
+ BasicBlock *ExitBB = *BBI;
+ if (!DT->dominates(DomNode, DT->getNode(ExitBB))) continue;
+
+ // If we already inserted something for this BB, don't reprocess it.
+ if (SSAUpdate.HasValueForBlock(ExitBB)) continue;
+
+ PHINode *PN = PHINode::Create(Inst->getType(), Inst->getName()+".lcssa",
+ ExitBB->begin());
+ PN->reserveOperandSpace(PredCache.GetNumPreds(ExitBB));
+
+ // Add inputs from inside the loop for this PHI.
+ for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) {
+ PN->addIncoming(Inst, *PI);
+
+ // If the exit block has a predecessor not within the loop, arrange for
+ // the incoming value use corresponding to that predecessor to be
+ // rewritten in terms of a different LCSSA PHI.
+ if (!inLoop(*PI))
+ UsesToRewrite.push_back(
+ &PN->getOperandUse(
+ PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1)));
+ }
+
+ // Remember that this phi makes the value alive in this block.
+ SSAUpdate.AddAvailableValue(ExitBB, PN);
+ }
+
+ // Rewrite all uses outside the loop in terms of the new PHIs we just
+ // inserted.
+ for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) {
+ // If this use is in an exit block, rewrite to use the newly inserted PHI.
+ // This is required for correctness because SSAUpdate doesn't handle uses in
+ // the same block. It assumes the PHI we inserted is at the end of the
+ // block.
+ Instruction *User = cast<Instruction>(UsesToRewrite[i]->getUser());
+ BasicBlock *UserBB = User->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(User))
+ UserBB = PN->getIncomingBlock(*UsesToRewrite[i]);
+
+ if (isa<PHINode>(UserBB->begin()) &&
+ isExitBlock(UserBB, ExitBlocks)) {
+ UsesToRewrite[i]->set(UserBB->begin());
+ continue;
+ }
+
+ // Otherwise, do full PHI insertion.
+ SSAUpdate.RewriteUse(*UsesToRewrite[i]);
+ }
+
+ return true;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
new file mode 100644
index 0000000..52f0499
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -0,0 +1,647 @@
+//===-- Local.cpp - Functions to perform local transformations ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform various local transformations to the
+// program.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Local constant propagation.
+//
+
+// ConstantFoldTerminator - If a terminator instruction is predicated on a
+// constant value, convert it into an unconditional branch to the constant
+// destination.
+//
+bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
+ TerminatorInst *T = BB->getTerminator();
+
+ // Branch - See if we are conditional jumping on constant
+ if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
+ if (BI->isUnconditional()) return false; // Can't optimize uncond branch
+ BasicBlock *Dest1 = BI->getSuccessor(0);
+ BasicBlock *Dest2 = BI->getSuccessor(1);
+
+ if (ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
+ // Are we branching on constant?
+ // YES. Change to unconditional branch...
+ BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2;
+ BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1;
+
+ //cerr << "Function: " << T->getParent()->getParent()
+ // << "\nRemoving branch from " << T->getParent()
+ // << "\n\nTo: " << OldDest << endl;
+
+ // Let the basic block know that we are letting go of it. Based on this,
+ // it will adjust it's PHI nodes.
+ assert(BI->getParent() && "Terminator not inserted in block!");
+ OldDest->removePredecessor(BI->getParent());
+
+ // Set the unconditional destination, and change the insn to be an
+ // unconditional branch.
+ BI->setUnconditionalDest(Destination);
+ return true;
+ }
+
+ if (Dest2 == Dest1) { // Conditional branch to same location?
+ // This branch matches something like this:
+ // br bool %cond, label %Dest, label %Dest
+ // and changes it into: br label %Dest
+
+ // Let the basic block know that we are letting go of one copy of it.
+ assert(BI->getParent() && "Terminator not inserted in block!");
+ Dest1->removePredecessor(BI->getParent());
+
+ // Change a conditional branch to unconditional.
+ BI->setUnconditionalDest(Dest1);
+ return true;
+ }
+ return false;
+ }
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
+ // If we are switching on a constant, we can convert the switch into a
+ // single branch instruction!
+ ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition());
+ BasicBlock *TheOnlyDest = SI->getSuccessor(0); // The default dest
+ BasicBlock *DefaultDest = TheOnlyDest;
+ assert(TheOnlyDest == SI->getDefaultDest() &&
+ "Default destination is not successor #0?");
+
+ // Figure out which case it goes to.
+ for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
+ // Found case matching a constant operand?
+ if (SI->getSuccessorValue(i) == CI) {
+ TheOnlyDest = SI->getSuccessor(i);
+ break;
+ }
+
+ // Check to see if this branch is going to the same place as the default
+ // dest. If so, eliminate it as an explicit compare.
+ if (SI->getSuccessor(i) == DefaultDest) {
+ // Remove this entry.
+ DefaultDest->removePredecessor(SI->getParent());
+ SI->removeCase(i);
+ --i; --e; // Don't skip an entry...
+ continue;
+ }
+
+ // Otherwise, check to see if the switch only branches to one destination.
+ // We do this by reseting "TheOnlyDest" to null when we find two non-equal
+ // destinations.
+ if (SI->getSuccessor(i) != TheOnlyDest) TheOnlyDest = 0;
+ }
+
+ if (CI && !TheOnlyDest) {
+ // Branching on a constant, but not any of the cases, go to the default
+ // successor.
+ TheOnlyDest = SI->getDefaultDest();
+ }
+
+ // If we found a single destination that we can fold the switch into, do so
+ // now.
+ if (TheOnlyDest) {
+ // Insert the new branch.
+ BranchInst::Create(TheOnlyDest, SI);
+ BasicBlock *BB = SI->getParent();
+
+ // Remove entries from PHI nodes which we no longer branch to...
+ for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
+ // Found case matching a constant operand?
+ BasicBlock *Succ = SI->getSuccessor(i);
+ if (Succ == TheOnlyDest)
+ TheOnlyDest = 0; // Don't modify the first branch to TheOnlyDest
+ else
+ Succ->removePredecessor(BB);
+ }
+
+ // Delete the old switch.
+ BB->getInstList().erase(SI);
+ return true;
+ }
+
+ if (SI->getNumSuccessors() == 2) {
+ // Otherwise, we can fold this switch into a conditional branch
+ // instruction if it has only one non-default destination.
+ Value *Cond = new ICmpInst(SI, ICmpInst::ICMP_EQ, SI->getCondition(),
+ SI->getSuccessorValue(1), "cond");
+ // Insert the new branch.
+ BranchInst::Create(SI->getSuccessor(1), SI->getSuccessor(0), Cond, SI);
+
+ // Delete the old switch.
+ SI->eraseFromParent();
+ return true;
+ }
+ return false;
+ }
+
+ if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(T)) {
+ // indirectbr blockaddress(@F, @BB) -> br label @BB
+ if (BlockAddress *BA =
+ dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) {
+ BasicBlock *TheOnlyDest = BA->getBasicBlock();
+ // Insert the new branch.
+ BranchInst::Create(TheOnlyDest, IBI);
+
+ for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+ if (IBI->getDestination(i) == TheOnlyDest)
+ TheOnlyDest = 0;
+ else
+ IBI->getDestination(i)->removePredecessor(IBI->getParent());
+ }
+ IBI->eraseFromParent();
+
+ // If we didn't find our destination in the IBI successor list, then we
+ // have undefined behavior. Replace the unconditional branch with an
+ // 'unreachable' instruction.
+ if (TheOnlyDest) {
+ BB->getTerminator()->eraseFromParent();
+ new UnreachableInst(BB->getContext(), BB);
+ }
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Local dead code elimination.
+//
+
+/// isInstructionTriviallyDead - Return true if the result produced by the
+/// instruction is not used, and the instruction has no side effects.
+///
+bool llvm::isInstructionTriviallyDead(Instruction *I) {
+ if (!I->use_empty() || isa<TerminatorInst>(I)) return false;
+
+ // We don't want debug info removed by anything this general.
+ if (isa<DbgInfoIntrinsic>(I)) return false;
+
+ // Likewise for memory use markers.
+ if (isa<MemoryUseIntrinsic>(I)) return false;
+
+ if (!I->mayHaveSideEffects()) return true;
+
+ // Special case intrinsics that "may have side effects" but can be deleted
+ // when dead.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ // Safe to delete llvm.stacksave if dead.
+ if (II->getIntrinsicID() == Intrinsic::stacksave)
+ return true;
+ return false;
+}
+
+/// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a
+/// trivially dead instruction, delete it. If that makes any of its operands
+/// trivially dead, delete them too, recursively. Return true if any
+/// instructions were deleted.
+bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I || !I->use_empty() || !isInstructionTriviallyDead(I))
+ return false;
+
+ SmallVector<Instruction*, 16> DeadInsts;
+ DeadInsts.push_back(I);
+
+ do {
+ I = DeadInsts.pop_back_val();
+
+ // Null out all of the instruction's operands to see if any operand becomes
+ // dead as we go.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *OpV = I->getOperand(i);
+ I->setOperand(i, 0);
+
+ if (!OpV->use_empty()) continue;
+
+ // If the operand is an instruction that became dead as we nulled out the
+ // operand, and if it is 'trivially' dead, delete it in a future loop
+ // iteration.
+ if (Instruction *OpI = dyn_cast<Instruction>(OpV))
+ if (isInstructionTriviallyDead(OpI))
+ DeadInsts.push_back(OpI);
+ }
+
+ I->eraseFromParent();
+ } while (!DeadInsts.empty());
+
+ return true;
+}
+
+/// RecursivelyDeleteDeadPHINode - If the specified value is an effectively
+/// dead PHI node, due to being a def-use chain of single-use nodes that
+/// either forms a cycle or is terminated by a trivially dead instruction,
+/// delete it. If that makes any of its operands trivially dead, delete them
+/// too, recursively. Return true if the PHI node is actually deleted.
+bool
+llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
+ // We can remove a PHI if it is on a cycle in the def-use graph
+ // where each node in the cycle has degree one, i.e. only one use,
+ // and is an instruction with no side effects.
+ if (!PN->hasOneUse())
+ return false;
+
+ bool Changed = false;
+ SmallPtrSet<PHINode *, 4> PHIs;
+ PHIs.insert(PN);
+ for (Instruction *J = cast<Instruction>(*PN->use_begin());
+ J->hasOneUse() && !J->mayHaveSideEffects();
+ J = cast<Instruction>(*J->use_begin()))
+ // If we find a PHI more than once, we're on a cycle that
+ // won't prove fruitful.
+ if (PHINode *JP = dyn_cast<PHINode>(J))
+ if (!PHIs.insert(cast<PHINode>(JP))) {
+ // Break the cycle and delete the PHI and its operands.
+ JP->replaceAllUsesWith(UndefValue::get(JP->getType()));
+ (void)RecursivelyDeleteTriviallyDeadInstructions(JP);
+ Changed = true;
+ break;
+ }
+ return Changed;
+}
+
+/// SimplifyInstructionsInBlock - Scan the specified basic block and try to
+/// simplify any instructions in it and recursively delete dead instructions.
+///
+/// This returns true if it changed the code, note that it can delete
+/// instructions in other blocks as well in this block.
+bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) {
+ bool MadeChange = false;
+ for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+ Instruction *Inst = BI++;
+
+ if (Value *V = SimplifyInstruction(Inst, TD)) {
+ WeakVH BIHandle(BI);
+ ReplaceAndSimplifyAllUses(Inst, V, TD);
+ MadeChange = true;
+ if (BIHandle != BI)
+ BI = BB->begin();
+ continue;
+ }
+
+ MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst);
+ }
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Control Flow Graph Restructuring.
+//
+
+
+/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this
+/// method is called when we're about to delete Pred as a predecessor of BB. If
+/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred.
+///
+/// Unlike the removePredecessor method, this attempts to simplify uses of PHI
+/// nodes that collapse into identity values. For example, if we have:
+/// x = phi(1, 0, 0, 0)
+/// y = and x, z
+///
+/// .. and delete the predecessor corresponding to the '1', this will attempt to
+/// recursively fold the and to 0.
+void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
+ TargetData *TD) {
+ // This only adjusts blocks with PHI nodes.
+ if (!isa<PHINode>(BB->begin()))
+ return;
+
+ // Remove the entries for Pred from the PHI nodes in BB, but do not simplify
+ // them down. This will leave us with single entry phi nodes and other phis
+ // that can be removed.
+ BB->removePredecessor(Pred, true);
+
+ WeakVH PhiIt = &BB->front();
+ while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
+ PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
+
+ Value *PNV = PN->hasConstantValue();
+ if (PNV == 0) continue;
+
+ // If we're able to simplify the phi to a single value, substitute the new
+ // value into all of its uses.
+ assert(PNV != PN && "hasConstantValue broken");
+
+ Value *OldPhiIt = PhiIt;
+ ReplaceAndSimplifyAllUses(PN, PNV, TD);
+
+ // If recursive simplification ended up deleting the next PHI node we would
+ // iterate to, then our iterator is invalid, restart scanning from the top
+ // of the block.
+ if (PhiIt != OldPhiIt) PhiIt = &BB->front();
+ }
+}
+
+
+/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
+/// predecessor is known to have one successor (DestBB!). Eliminate the edge
+/// between them, moving the instructions in the predecessor into DestBB and
+/// deleting the predecessor block.
+///
+void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
+ // If BB has single-entry PHI nodes, fold them.
+ while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
+ Value *NewVal = PN->getIncomingValue(0);
+ // Replace self referencing PHI with undef, it must be dead.
+ if (NewVal == PN) NewVal = UndefValue::get(PN->getType());
+ PN->replaceAllUsesWith(NewVal);
+ PN->eraseFromParent();
+ }
+
+ BasicBlock *PredBB = DestBB->getSinglePredecessor();
+ assert(PredBB && "Block doesn't have a single predecessor!");
+
+ // Splice all the instructions from PredBB to DestBB.
+ PredBB->getTerminator()->eraseFromParent();
+ DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
+
+ // Zap anything that took the address of DestBB. Not doing this will give the
+ // address an invalid value.
+ if (DestBB->hasAddressTaken()) {
+ BlockAddress *BA = BlockAddress::get(DestBB);
+ Constant *Replacement =
+ ConstantInt::get(llvm::Type::getInt32Ty(BA->getContext()), 1);
+ BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
+ BA->getType()));
+ BA->destroyConstant();
+ }
+
+ // Anything that branched to PredBB now branches to DestBB.
+ PredBB->replaceAllUsesWith(DestBB);
+
+ if (P) {
+ ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
+ if (PI) {
+ PI->replaceAllUses(PredBB, DestBB);
+ PI->removeEdge(ProfileInfo::getEdge(PredBB, DestBB));
+ }
+ }
+ // Nuke BB.
+ PredBB->eraseFromParent();
+}
+
+/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
+/// almost-empty BB ending in an unconditional branch to Succ, into succ.
+///
+/// Assumption: Succ is the single successor for BB.
+///
+static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
+ assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
+
+ DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into "
+ << Succ->getName() << "\n");
+ // Shortcut, if there is only a single predecessor it must be BB and merging
+ // is always safe
+ if (Succ->getSinglePredecessor()) return true;
+
+ // Make a list of the predecessors of BB
+ typedef SmallPtrSet<BasicBlock*, 16> BlockSet;
+ BlockSet BBPreds(pred_begin(BB), pred_end(BB));
+
+ // Use that list to make another list of common predecessors of BB and Succ
+ BlockSet CommonPreds;
+ for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
+ PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (BBPreds.count(P))
+ CommonPreds.insert(P);
+ }
+
+ // Shortcut, if there are no common predecessors, merging is always safe
+ if (CommonPreds.empty())
+ return true;
+
+ // Look at all the phi nodes in Succ, to see if they present a conflict when
+ // merging these blocks
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+
+ // If the incoming value from BB is again a PHINode in
+ // BB which has the same incoming value for *PI as PN does, we can
+ // merge the phi nodes and then the blocks can still be merged
+ PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
+ if (BBPN && BBPN->getParent() == BB) {
+ for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
+ PI != PE; PI++) {
+ if (BBPN->getIncomingValueForBlock(*PI)
+ != PN->getIncomingValueForBlock(*PI)) {
+ DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
+ << Succ->getName() << " is conflicting with "
+ << BBPN->getName() << " with regard to common predecessor "
+ << (*PI)->getName() << "\n");
+ return false;
+ }
+ }
+ } else {
+ Value* Val = PN->getIncomingValueForBlock(BB);
+ for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
+ PI != PE; PI++) {
+ // See if the incoming value for the common predecessor is equal to the
+ // one for BB, in which case this phi node will not prevent the merging
+ // of the block.
+ if (Val != PN->getIncomingValueForBlock(*PI)) {
+ DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
+ << Succ->getName() << " is conflicting with regard to common "
+ << "predecessor " << (*PI)->getName() << "\n");
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an
+/// unconditional branch, and contains no instructions other than PHI nodes,
+/// potential debug intrinsics and the branch. If possible, eliminate BB by
+/// rewriting all the predecessors to branch to the successor block and return
+/// true. If we can't transform, return false.
+bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
+ assert(BB != &BB->getParent()->getEntryBlock() &&
+ "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!");
+
+ // We can't eliminate infinite loops.
+ BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0);
+ if (BB == Succ) return false;
+
+ // Check to see if merging these blocks would cause conflicts for any of the
+ // phi nodes in BB or Succ. If not, we can safely merge.
+ if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
+
+ // Check for cases where Succ has multiple predecessors and a PHI node in BB
+ // has uses which will not disappear when the PHI nodes are merged. It is
+ // possible to handle such cases, but difficult: it requires checking whether
+ // BB dominates Succ, which is non-trivial to calculate in the case where
+ // Succ has multiple predecessors. Also, it requires checking whether
+ // constructing the necessary self-referential PHI node doesn't intoduce any
+ // conflicts; this isn't too difficult, but the previous code for doing this
+ // was incorrect.
+ //
+ // Note that if this check finds a live use, BB dominates Succ, so BB is
+ // something like a loop pre-header (or rarely, a part of an irreducible CFG);
+ // folding the branch isn't profitable in that case anyway.
+ if (!Succ->getSinglePredecessor()) {
+ BasicBlock::iterator BBI = BB->begin();
+ while (isa<PHINode>(*BBI)) {
+ for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
+ UI != E; ++UI) {
+ if (PHINode* PN = dyn_cast<PHINode>(*UI)) {
+ if (PN->getIncomingBlock(UI) != BB)
+ return false;
+ } else {
+ return false;
+ }
+ }
+ ++BBI;
+ }
+ }
+
+ DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
+
+ if (isa<PHINode>(Succ->begin())) {
+ // If there is more than one pred of succ, and there are PHI nodes in
+ // the successor, then we need to add incoming edges for the PHI nodes
+ //
+ const SmallVector<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
+
+ // Loop over all of the PHI nodes in the successor of BB.
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ Value *OldVal = PN->removeIncomingValue(BB, false);
+ assert(OldVal && "No entry in PHI for Pred BB!");
+
+ // If this incoming value is one of the PHI nodes in BB, the new entries
+ // in the PHI node are the entries from the old PHI.
+ if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
+ PHINode *OldValPN = cast<PHINode>(OldVal);
+ for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i)
+ // Note that, since we are merging phi nodes and BB and Succ might
+ // have common predecessors, we could end up with a phi node with
+ // identical incoming branches. This will be cleaned up later (and
+ // will trigger asserts if we try to clean it up now, without also
+ // simplifying the corresponding conditional branch).
+ PN->addIncoming(OldValPN->getIncomingValue(i),
+ OldValPN->getIncomingBlock(i));
+ } else {
+ // Add an incoming value for each of the new incoming values.
+ for (unsigned i = 0, e = BBPreds.size(); i != e; ++i)
+ PN->addIncoming(OldVal, BBPreds[i]);
+ }
+ }
+ }
+
+ while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
+ if (Succ->getSinglePredecessor()) {
+ // BB is the only predecessor of Succ, so Succ will end up with exactly
+ // the same predecessors BB had.
+ Succ->getInstList().splice(Succ->begin(),
+ BB->getInstList(), BB->begin());
+ } else {
+ // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
+ assert(PN->use_empty() && "There shouldn't be any uses here!");
+ PN->eraseFromParent();
+ }
+ }
+
+ // Everything that jumped to BB now goes to Succ.
+ BB->replaceAllUsesWith(Succ);
+ if (!Succ->hasName()) Succ->takeName(BB);
+ BB->eraseFromParent(); // Delete the old basic block.
+ return true;
+}
+
+/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI
+/// nodes in this block. This doesn't try to be clever about PHI nodes
+/// which differ only in the order of the incoming values, but instcombine
+/// orders them so it usually won't matter.
+///
+bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
+ bool Changed = false;
+
+ // This implementation doesn't currently consider undef operands
+ // specially. Theroetically, two phis which are identical except for
+ // one having an undef where the other doesn't could be collapsed.
+
+ // Map from PHI hash values to PHI nodes. If multiple PHIs have
+ // the same hash value, the element is the first PHI in the
+ // linked list in CollisionMap.
+ DenseMap<uintptr_t, PHINode *> HashMap;
+
+ // Maintain linked lists of PHI nodes with common hash values.
+ DenseMap<PHINode *, PHINode *> CollisionMap;
+
+ // Examine each PHI.
+ for (BasicBlock::iterator I = BB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I++); ) {
+ // Compute a hash value on the operands. Instcombine will likely have sorted
+ // them, which helps expose duplicates, but we have to check all the
+ // operands to be safe in case instcombine hasn't run.
+ uintptr_t Hash = 0;
+ for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) {
+ // This hash algorithm is quite weak as hash functions go, but it seems
+ // to do a good enough job for this particular purpose, and is very quick.
+ Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I));
+ Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
+ }
+ // If we've never seen this hash value before, it's a unique PHI.
+ std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair =
+ HashMap.insert(std::make_pair(Hash, PN));
+ if (Pair.second) continue;
+ // Otherwise it's either a duplicate or a hash collision.
+ for (PHINode *OtherPN = Pair.first->second; ; ) {
+ if (OtherPN->isIdenticalTo(PN)) {
+ // A duplicate. Replace this PHI with its duplicate.
+ PN->replaceAllUsesWith(OtherPN);
+ PN->eraseFromParent();
+ Changed = true;
+ break;
+ }
+ // A non-duplicate hash collision.
+ DenseMap<PHINode *, PHINode *>::iterator I = CollisionMap.find(OtherPN);
+ if (I == CollisionMap.end()) {
+ // Set this PHI to be the head of the linked list of colliding PHIs.
+ PHINode *Old = Pair.first->second;
+ Pair.first->second = PN;
+ CollisionMap[PN] = Old;
+ break;
+ }
+ // Procede to the next PHI in the list.
+ OtherPN = I->second;
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
new file mode 100644
index 0000000..b3c4801
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -0,0 +1,762 @@
+//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs several transformations to transform natural loops into a
+// simpler form, which makes subsequent analyses and transformations simpler and
+// more effective.
+//
+// Loop pre-header insertion guarantees that there is a single, non-critical
+// entry edge from outside of the loop to the loop header. This simplifies a
+// number of analyses and transformations, such as LICM.
+//
+// Loop exit-block insertion guarantees that all exit blocks from the loop
+// (blocks which are outside of the loop that have predecessors inside of the
+// loop) only have predecessors from inside of the loop (and are thus dominated
+// by the loop header). This simplifies transformations such as store-sinking
+// that are built into LICM.
+//
+// This pass also guarantees that loops will have exactly one backedge.
+//
+// Indirectbr instructions introduce several complications. If the loop
+// contains or is entered by an indirectbr instruction, it may not be possible
+// to transform the loop and make these guarantees. Client code should check
+// that these conditions are true before relying on them.
+//
+// Note that the simplifycfg pass will clean up blocks which are split out but
+// end up being unnecessary, so usage of this pass should not pessimize
+// generated code.
+//
+// This pass obviously modifies the CFG, but updates loop information and
+// dominator information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loopsimplify"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+using namespace llvm;
+
+STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
+STATISTIC(NumNested , "Number of nested loops split out");
+
+namespace {
+ struct LoopSimplify : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+ LoopSimplify() : LoopPass(ID) {}
+
+ // AA - If we have an alias analysis object to update, this is it, otherwise
+ // this is null.
+ AliasAnalysis *AA;
+ LoopInfo *LI;
+ DominatorTree *DT;
+ ScalarEvolution *SE;
+ Loop *L;
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ // We need loop information to identify the loops...
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
+ AU.addPreserved<DominanceFrontier>();
+ AU.addPreservedID(LCSSAID);
+ }
+
+ /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
+ void verifyAnalysis() const;
+
+ private:
+ bool ProcessLoop(Loop *L, LPPassManager &LPM);
+ BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
+ BasicBlock *InsertPreheaderForLoop(Loop *L);
+ Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM);
+ BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
+ void PlaceSplitBlockCarefully(BasicBlock *NewBB,
+ SmallVectorImpl<BasicBlock*> &SplitPreds,
+ Loop *L);
+ };
+}
+
+char LoopSimplify::ID = 0;
+INITIALIZE_PASS(LoopSimplify, "loopsimplify",
+ "Canonicalize natural loops", true, false);
+
+// Publically exposed interface to pass...
+char &llvm::LoopSimplifyID = LoopSimplify::ID;
+Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
+
+/// runOnLoop - Run down all loops in the CFG (recursively, but we could do
+/// it in any convenient order) inserting preheaders...
+///
+bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) {
+ L = l;
+ bool Changed = false;
+ LI = &getAnalysis<LoopInfo>();
+ AA = getAnalysisIfAvailable<AliasAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
+ SE = getAnalysisIfAvailable<ScalarEvolution>();
+
+ Changed |= ProcessLoop(L, LPM);
+
+ return Changed;
+}
+
+/// ProcessLoop - Walk the loop structure in depth first order, ensuring that
+/// all loops have preheaders.
+///
+bool LoopSimplify::ProcessLoop(Loop *L, LPPassManager &LPM) {
+ bool Changed = false;
+ReprocessLoop:
+
+ // Check to see that no blocks (other than the header) in this loop have
+ // predecessors that are not in the loop. This is not valid for natural
+ // loops, but can occur if the blocks are unreachable. Since they are
+ // unreachable we can just shamelessly delete those CFG edges!
+ for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
+ BB != E; ++BB) {
+ if (*BB == L->getHeader()) continue;
+
+ SmallPtrSet<BasicBlock*, 4> BadPreds;
+ for (pred_iterator PI = pred_begin(*BB),
+ PE = pred_end(*BB); PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (!L->contains(P))
+ BadPreds.insert(P);
+ }
+
+ // Delete each unique out-of-loop (and thus dead) predecessor.
+ for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(),
+ E = BadPreds.end(); I != E; ++I) {
+
+ DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor ";
+ WriteAsOperand(dbgs(), *I, false);
+ dbgs() << "\n");
+
+ // Inform each successor of each dead pred.
+ for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
+ (*SI)->removePredecessor(*I);
+ // Zap the dead pred's terminator and replace it with unreachable.
+ TerminatorInst *TI = (*I)->getTerminator();
+ TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+ (*I)->getTerminator()->eraseFromParent();
+ new UnreachableInst((*I)->getContext(), *I);
+ Changed = true;
+ }
+ }
+
+ // If there are exiting blocks with branches on undef, resolve the undef in
+ // the direction which will exit the loop. This will help simplify loop
+ // trip count computations.
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
+ E = ExitingBlocks.end(); I != E; ++I)
+ if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator()))
+ if (BI->isConditional()) {
+ if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
+
+ DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in ";
+ WriteAsOperand(dbgs(), *I, false);
+ dbgs() << "\n");
+
+ BI->setCondition(ConstantInt::get(Cond->getType(),
+ !L->contains(BI->getSuccessor(0))));
+ Changed = true;
+ }
+ }
+
+ // Does the loop already have a preheader? If so, don't insert one.
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) {
+ Preheader = InsertPreheaderForLoop(L);
+ if (Preheader) {
+ ++NumInserted;
+ Changed = true;
+ }
+ }
+
+ // Next, check to make sure that all exit nodes of the loop only have
+ // predecessors that are inside of the loop. This check guarantees that the
+ // loop preheader/header will dominate the exit blocks. If the exit block has
+ // predecessors from outside of the loop, split the edge now.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+
+ SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
+ ExitBlocks.end());
+ for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(),
+ E = ExitBlockSet.end(); I != E; ++I) {
+ BasicBlock *ExitBlock = *I;
+ for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
+ PI != PE; ++PI)
+ // Must be exactly this loop: no subloops, parent loops, or non-loop preds
+ // allowed.
+ if (!L->contains(*PI)) {
+ if (RewriteLoopExitBlock(L, ExitBlock)) {
+ ++NumInserted;
+ Changed = true;
+ }
+ break;
+ }
+ }
+
+ // If the header has more than two predecessors at this point (from the
+ // preheader and from multiple backedges), we must adjust the loop.
+ BasicBlock *LoopLatch = L->getLoopLatch();
+ if (!LoopLatch) {
+ // If this is really a nested loop, rip it out into a child loop. Don't do
+ // this for loops with a giant number of backedges, just factor them into a
+ // common backedge instead.
+ if (L->getNumBackEdges() < 8) {
+ if (SeparateNestedLoop(L, LPM)) {
+ ++NumNested;
+ // This is a big restructuring change, reprocess the whole loop.
+ Changed = true;
+ // GCC doesn't tail recursion eliminate this.
+ goto ReprocessLoop;
+ }
+ }
+
+ // If we either couldn't, or didn't want to, identify nesting of the loops,
+ // insert a new block that all backedges target, then make it jump to the
+ // loop header.
+ LoopLatch = InsertUniqueBackedgeBlock(L, Preheader);
+ if (LoopLatch) {
+ ++NumInserted;
+ Changed = true;
+ }
+ }
+
+ // Scan over the PHI nodes in the loop header. Since they now have only two
+ // incoming values (the loop is canonicalized), we may have simplified the PHI
+ // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
+ PHINode *PN;
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ (PN = dyn_cast<PHINode>(I++)); )
+ if (Value *V = PN->hasConstantValue(DT)) {
+ if (AA) AA->deleteValue(PN);
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ }
+
+ // If this loop has multiple exits and the exits all go to the same
+ // block, attempt to merge the exits. This helps several passes, such
+ // as LoopRotation, which do not support loops with multiple exits.
+ // SimplifyCFG also does this (and this code uses the same utility
+ // function), however this code is loop-aware, where SimplifyCFG is
+ // not. That gives it the advantage of being able to hoist
+ // loop-invariant instructions out of the way to open up more
+ // opportunities, and the disadvantage of having the responsibility
+ // to preserve dominator information.
+ bool UniqueExit = true;
+ if (!ExitBlocks.empty())
+ for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
+ if (ExitBlocks[i] != ExitBlocks[0]) {
+ UniqueExit = false;
+ break;
+ }
+ if (UniqueExit) {
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitingBlock = ExitingBlocks[i];
+ if (!ExitingBlock->getSinglePredecessor()) continue;
+ BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!BI || !BI->isConditional()) continue;
+ CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+ if (!CI || CI->getParent() != ExitingBlock) continue;
+
+ // Attempt to hoist out all instructions except for the
+ // comparison and the branch.
+ bool AllInvariant = true;
+ for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
+ Instruction *Inst = I++;
+ // Skip debug info intrinsics.
+ if (isa<DbgInfoIntrinsic>(Inst))
+ continue;
+ if (Inst == CI)
+ continue;
+ if (!L->makeLoopInvariant(Inst, Changed,
+ Preheader ? Preheader->getTerminator() : 0)) {
+ AllInvariant = false;
+ break;
+ }
+ }
+ if (!AllInvariant) continue;
+
+ // The block has now been cleared of all instructions except for
+ // a comparison and a conditional branch. SimplifyCFG may be able
+ // to fold it now.
+ if (!FoldBranchToCommonDest(BI)) continue;
+
+ // Success. The block is now dead, so remove it from the loop,
+ // update the dominator tree and dominance frontier, and delete it.
+
+ DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block ";
+ WriteAsOperand(dbgs(), ExitingBlock, false);
+ dbgs() << "\n");
+
+ assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
+ Changed = true;
+ LI->removeBlock(ExitingBlock);
+
+ DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>();
+ DomTreeNode *Node = DT->getNode(ExitingBlock);
+ const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
+ Node->getChildren();
+ while (!Children.empty()) {
+ DomTreeNode *Child = Children.front();
+ DT->changeImmediateDominator(Child, Node->getIDom());
+ if (DF) DF->changeImmediateDominator(Child->getBlock(),
+ Node->getIDom()->getBlock(),
+ DT);
+ }
+ DT->eraseNode(ExitingBlock);
+ if (DF) DF->removeBlock(ExitingBlock);
+
+ BI->getSuccessor(0)->removePredecessor(ExitingBlock);
+ BI->getSuccessor(1)->removePredecessor(ExitingBlock);
+ ExitingBlock->eraseFromParent();
+ }
+ }
+
+ return Changed;
+}
+
+/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a
+/// preheader, this method is called to insert one. This method has two phases:
+/// preheader insertion and analysis updating.
+///
+BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
+ BasicBlock *Header = L->getHeader();
+
+ // Compute the set of predecessors of the loop that are not in the loop.
+ SmallVector<BasicBlock*, 8> OutsideBlocks;
+ for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
+ PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (!L->contains(P)) { // Coming in from outside the loop?
+ // If the loop is branched to from an indirect branch, we won't
+ // be able to fully transform the loop, because it prohibits
+ // edge splitting.
+ if (isa<IndirectBrInst>(P->getTerminator())) return 0;
+
+ // Keep track of it.
+ OutsideBlocks.push_back(P);
+ }
+ }
+
+ // Split out the loop pre-header.
+ BasicBlock *NewBB =
+ SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
+ ".preheader", this);
+
+ DEBUG(dbgs() << "LoopSimplify: Creating pre-header ";
+ WriteAsOperand(dbgs(), NewBB, false);
+ dbgs() << "\n");
+
+ // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+ // code layout too horribly.
+ PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L);
+
+ return NewBB;
+}
+
+/// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit
+/// blocks. This method is used to split exit blocks that have predecessors
+/// outside of the loop.
+BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
+ SmallVector<BasicBlock*, 8> LoopBlocks;
+ for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
+ BasicBlock *P = *I;
+ if (L->contains(P)) {
+ // Don't do this if the loop is exited via an indirect branch.
+ if (isa<IndirectBrInst>(P->getTerminator())) return 0;
+
+ LoopBlocks.push_back(P);
+ }
+ }
+
+ assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
+ BasicBlock *NewBB = SplitBlockPredecessors(Exit, &LoopBlocks[0],
+ LoopBlocks.size(), ".loopexit",
+ this);
+
+ DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block ";
+ WriteAsOperand(dbgs(), NewBB, false);
+ dbgs() << "\n");
+
+ return NewBB;
+}
+
+/// AddBlockAndPredsToSet - Add the specified block, and all of its
+/// predecessors, to the specified set, if it's not already in there. Stop
+/// predecessor traversal when we reach StopBlock.
+static void AddBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
+ std::set<BasicBlock*> &Blocks) {
+ std::vector<BasicBlock *> WorkList;
+ WorkList.push_back(InputBB);
+ do {
+ BasicBlock *BB = WorkList.back(); WorkList.pop_back();
+ if (Blocks.insert(BB).second && BB != StopBlock)
+ // If BB is not already processed and it is not a stop block then
+ // insert its predecessor in the work list
+ for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
+ BasicBlock *WBB = *I;
+ WorkList.push_back(WBB);
+ }
+ } while(!WorkList.empty());
+}
+
+/// FindPHIToPartitionLoops - The first part of loop-nestification is to find a
+/// PHI node that tells us how to partition the loops.
+static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
+ AliasAnalysis *AA) {
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
+ PHINode *PN = cast<PHINode>(I);
+ ++I;
+ if (Value *V = PN->hasConstantValue(DT)) {
+ // This is a degenerate PHI already, don't modify it!
+ PN->replaceAllUsesWith(V);
+ if (AA) AA->deleteValue(PN);
+ PN->eraseFromParent();
+ continue;
+ }
+
+ // Scan this PHI node looking for a use of the PHI node by itself.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == PN &&
+ L->contains(PN->getIncomingBlock(i)))
+ // We found something tasty to remove.
+ return PN;
+ }
+ return 0;
+}
+
+// PlaceSplitBlockCarefully - If the block isn't already, move the new block to
+// right after some 'outside block' block. This prevents the preheader from
+// being placed inside the loop body, e.g. when the loop hasn't been rotated.
+void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
+ SmallVectorImpl<BasicBlock*> &SplitPreds,
+ Loop *L) {
+ // Check to see if NewBB is already well placed.
+ Function::iterator BBI = NewBB; --BBI;
+ for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+ if (&*BBI == SplitPreds[i])
+ return;
+ }
+
+ // If it isn't already after an outside block, move it after one. This is
+ // always good as it makes the uncond branch from the outside block into a
+ // fall-through.
+
+ // Figure out *which* outside block to put this after. Prefer an outside
+ // block that neighbors a BB actually in the loop.
+ BasicBlock *FoundBB = 0;
+ for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+ Function::iterator BBI = SplitPreds[i];
+ if (++BBI != NewBB->getParent()->end() &&
+ L->contains(BBI)) {
+ FoundBB = SplitPreds[i];
+ break;
+ }
+ }
+
+ // If our heuristic for a *good* bb to place this after doesn't find
+ // anything, just pick something. It's likely better than leaving it within
+ // the loop.
+ if (!FoundBB)
+ FoundBB = SplitPreds[0];
+ NewBB->moveAfter(FoundBB);
+}
+
+
+/// SeparateNestedLoop - If this loop has multiple backedges, try to pull one of
+/// them out into a nested loop. This is important for code that looks like
+/// this:
+///
+/// Loop:
+/// ...
+/// br cond, Loop, Next
+/// ...
+/// br cond2, Loop, Out
+///
+/// To identify this common case, we look at the PHI nodes in the header of the
+/// loop. PHI nodes with unchanging values on one backedge correspond to values
+/// that change in the "outer" loop, but not in the "inner" loop.
+///
+/// If we are able to separate out a loop, return the new outer loop that was
+/// created.
+///
+Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
+ PHINode *PN = FindPHIToPartitionLoops(L, DT, AA);
+ if (PN == 0) return 0; // No known way to partition.
+
+ // Pull out all predecessors that have varying values in the loop. This
+ // handles the case when a PHI node has multiple instances of itself as
+ // arguments.
+ SmallVector<BasicBlock*, 8> OuterLoopPreds;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) != PN ||
+ !L->contains(PN->getIncomingBlock(i))) {
+ // We can't split indirectbr edges.
+ if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
+ return 0;
+
+ OuterLoopPreds.push_back(PN->getIncomingBlock(i));
+ }
+
+ DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
+
+ // If ScalarEvolution is around and knows anything about values in
+ // this loop, tell it to forget them, because we're about to
+ // substantially change it.
+ if (SE)
+ SE->forgetLoop(L);
+
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0],
+ OuterLoopPreds.size(),
+ ".outer", this);
+
+ // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+ // code layout too horribly.
+ PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L);
+
+ // Create the new outer loop.
+ Loop *NewOuter = new Loop();
+
+ // Change the parent loop to use the outer loop as its child now.
+ if (Loop *Parent = L->getParentLoop())
+ Parent->replaceChildLoopWith(L, NewOuter);
+ else
+ LI->changeTopLevelLoop(L, NewOuter);
+
+ // L is now a subloop of our outer loop.
+ NewOuter->addChildLoop(L);
+
+ // Add the new loop to the pass manager queue.
+ LPM.insertLoopIntoQueue(NewOuter);
+
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I)
+ NewOuter->addBlockEntry(*I);
+
+ // Now reset the header in L, which had been moved by
+ // SplitBlockPredecessors for the outer loop.
+ L->moveToHeader(Header);
+
+ // Determine which blocks should stay in L and which should be moved out to
+ // the Outer loop now.
+ std::set<BasicBlock*> BlocksInL;
+ for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) {
+ BasicBlock *P = *PI;
+ if (DT->dominates(Header, P))
+ AddBlockAndPredsToSet(P, Header, BlocksInL);
+ }
+
+ // Scan all of the loop children of L, moving them to OuterLoop if they are
+ // not part of the inner loop.
+ const std::vector<Loop*> &SubLoops = L->getSubLoops();
+ for (size_t I = 0; I != SubLoops.size(); )
+ if (BlocksInL.count(SubLoops[I]->getHeader()))
+ ++I; // Loop remains in L
+ else
+ NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I));
+
+ // Now that we know which blocks are in L and which need to be moved to
+ // OuterLoop, move any blocks that need it.
+ for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
+ BasicBlock *BB = L->getBlocks()[i];
+ if (!BlocksInL.count(BB)) {
+ // Move this block to the parent, updating the exit blocks sets
+ L->removeBlockFromLoop(BB);
+ if ((*LI)[BB] == L)
+ LI->changeLoopFor(BB, NewOuter);
+ --i;
+ }
+ }
+
+ return NewOuter;
+}
+
+
+
+/// InsertUniqueBackedgeBlock - This method is called when the specified loop
+/// has more than one backedge in it. If this occurs, revector all of these
+/// backedges to target a new basic block and have that block branch to the loop
+/// header. This ensures that loops have exactly one backedge.
+///
+BasicBlock *
+LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
+ assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
+
+ // Get information about the loop
+ BasicBlock *Header = L->getHeader();
+ Function *F = Header->getParent();
+
+ // Unique backedge insertion currently depends on having a preheader.
+ if (!Preheader)
+ return 0;
+
+ // Figure out which basic blocks contain back-edges to the loop header.
+ std::vector<BasicBlock*> BackedgeBlocks;
+ for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
+ BasicBlock *P = *I;
+
+ // Indirectbr edges cannot be split, so we must fail if we find one.
+ if (isa<IndirectBrInst>(P->getTerminator()))
+ return 0;
+
+ if (P != Preheader) BackedgeBlocks.push_back(P);
+ }
+
+ // Create and insert the new backedge block...
+ BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(),
+ Header->getName()+".backedge", F);
+ BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
+
+ DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block ";
+ WriteAsOperand(dbgs(), BEBlock, false);
+ dbgs() << "\n");
+
+ // Move the new backedge block to right after the last backedge block.
+ Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos;
+ F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);
+
+ // Now that the block has been inserted into the function, create PHI nodes in
+ // the backedge block which correspond to any PHI nodes in the header block.
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".be",
+ BETerminator);
+ NewPN->reserveOperandSpace(BackedgeBlocks.size());
+ if (AA) AA->copyValue(PN, NewPN);
+
+ // Loop over the PHI node, moving all entries except the one for the
+ // preheader over to the new PHI node.
+ unsigned PreheaderIdx = ~0U;
+ bool HasUniqueIncomingValue = true;
+ Value *UniqueValue = 0;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *IBB = PN->getIncomingBlock(i);
+ Value *IV = PN->getIncomingValue(i);
+ if (IBB == Preheader) {
+ PreheaderIdx = i;
+ } else {
+ NewPN->addIncoming(IV, IBB);
+ if (HasUniqueIncomingValue) {
+ if (UniqueValue == 0)
+ UniqueValue = IV;
+ else if (UniqueValue != IV)
+ HasUniqueIncomingValue = false;
+ }
+ }
+ }
+
+ // Delete all of the incoming values from the old PN except the preheader's
+ assert(PreheaderIdx != ~0U && "PHI has no preheader entry??");
+ if (PreheaderIdx != 0) {
+ PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx));
+ PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx));
+ }
+ // Nuke all entries except the zero'th.
+ for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i)
+ PN->removeIncomingValue(e-i, false);
+
+ // Finally, add the newly constructed PHI node as the entry for the BEBlock.
+ PN->addIncoming(NewPN, BEBlock);
+
+ // As an optimization, if all incoming values in the new PhiNode (which is a
+ // subset of the incoming values of the old PHI node) have the same value,
+ // eliminate the PHI Node.
+ if (HasUniqueIncomingValue) {
+ NewPN->replaceAllUsesWith(UniqueValue);
+ if (AA) AA->deleteValue(NewPN);
+ BEBlock->getInstList().erase(NewPN);
+ }
+ }
+
+ // Now that all of the PHI nodes have been inserted and adjusted, modify the
+ // backedge blocks to just to the BEBlock instead of the header.
+ for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) {
+ TerminatorInst *TI = BackedgeBlocks[i]->getTerminator();
+ for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op)
+ if (TI->getSuccessor(Op) == Header)
+ TI->setSuccessor(Op, BEBlock);
+ }
+
+ //===--- Update all analyses which we must preserve now -----------------===//
+
+ // Update Loop Information - we know that this block is now in the current
+ // loop and all parent loops.
+ L->addBasicBlockToLoop(BEBlock, LI->getBase());
+
+ // Update dominator information
+ DT->splitBlock(BEBlock);
+ if (DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>())
+ DF->splitBlock(BEBlock);
+
+ return BEBlock;
+}
+
+void LoopSimplify::verifyAnalysis() const {
+ // It used to be possible to just assert L->isLoopSimplifyForm(), however
+ // with the introduction of indirectbr, there are now cases where it's
+ // not possible to transform a loop as necessary. We can at least check
+ // that there is an indirectbr near any time there's trouble.
+
+ // Indirectbr can interfere with preheader and unique backedge insertion.
+ if (!L->getLoopPreheader() || !L->getLoopLatch()) {
+ bool HasIndBrPred = false;
+ for (pred_iterator PI = pred_begin(L->getHeader()),
+ PE = pred_end(L->getHeader()); PI != PE; ++PI)
+ if (isa<IndirectBrInst>((*PI)->getTerminator())) {
+ HasIndBrPred = true;
+ break;
+ }
+ assert(HasIndBrPred &&
+ "LoopSimplify has no excuse for missing loop header info!");
+ }
+
+ // Indirectbr can interfere with exit block canonicalization.
+ if (!L->hasDedicatedExits()) {
+ bool HasIndBrExiting = false;
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i)
+ if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) {
+ HasIndBrExiting = true;
+ break;
+ }
+ assert(HasIndBrExiting &&
+ "LoopSimplify has no excuse for missing exit block info!");
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
new file mode 100644
index 0000000..236bbe9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -0,0 +1,378 @@
+//===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some loop unrolling utilities. It does not define any
+// actual pass or policy, but provides a single function to perform loop
+// unrolling.
+//
+// It works best when loops have been canonicalized by the -indvars pass,
+// allowing it to determine the trip counts of loops easily.
+//
+// The process of unrolling can produce extraneous basic blocks linked with
+// unconditional branches. This will be corrected in the future.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+
+// TODO: Should these be here or in LoopUnroll?
+STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
+STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
+
+/// RemapInstruction - Convert the instruction operands from referencing the
+/// current values into those specified by VMap.
+static inline void RemapInstruction(Instruction *I,
+ ValueMap<const Value *, Value*> &VMap) {
+ for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
+ Value *Op = I->getOperand(op);
+ ValueMap<const Value *, Value*>::iterator It = VMap.find(Op);
+ if (It != VMap.end())
+ I->setOperand(op, It->second);
+ }
+}
+
+/// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it
+/// only has one predecessor, and that predecessor only has one successor.
+/// The LoopInfo Analysis that is passed will be kept consistent.
+/// Returns the new combined block.
+static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
+ // Merge basic blocks into their predecessor if there is only one distinct
+ // pred, and if there is only one distinct successor of the predecessor, and
+ // if there are no PHI nodes.
+ BasicBlock *OnlyPred = BB->getSinglePredecessor();
+ if (!OnlyPred) return 0;
+
+ if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
+ return 0;
+
+ DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred);
+
+ // Resolve any PHI nodes at the start of the block. They are all
+ // guaranteed to have exactly one entry if they exist, unless there are
+ // multiple duplicate (but guaranteed to be equal) entries for the
+ // incoming edges. This occurs when there are multiple edges from
+ // OnlyPred to OnlySucc.
+ FoldSingleEntryPHINodes(BB);
+
+ // Delete the unconditional branch from the predecessor...
+ OnlyPred->getInstList().pop_back();
+
+ // Move all definitions in the successor to the predecessor...
+ OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
+
+ // Make all PHI nodes that referred to BB now refer to Pred as their
+ // source...
+ BB->replaceAllUsesWith(OnlyPred);
+
+ std::string OldName = BB->getName();
+
+ // Erase basic block from the function...
+ LI->removeBlock(BB);
+ BB->eraseFromParent();
+
+ // Inherit predecessor's name if it exists...
+ if (!OldName.empty() && !OnlyPred->hasName())
+ OnlyPred->setName(OldName);
+
+ return OnlyPred;
+}
+
+/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
+/// if unrolling was succesful, or false if the loop was unmodified. Unrolling
+/// can only fail when the loop's latch block is not terminated by a conditional
+/// branch instruction. However, if the trip count (and multiple) are not known,
+/// loop unrolling will mostly produce more code that is no faster.
+///
+/// The LoopInfo Analysis that is passed will be kept consistent.
+///
+/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
+/// removed from the LoopPassManager as well. LPM can also be NULL.
+bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) {
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) {
+ DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
+ return false;
+ }
+
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ if (!LatchBlock) {
+ DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n");
+ return false;
+ }
+
+ BasicBlock *Header = L->getHeader();
+ BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
+
+ if (!BI || BI->isUnconditional()) {
+ // The loop-rotate pass can be helpful to avoid this in many cases.
+ DEBUG(dbgs() <<
+ " Can't unroll; loop not terminated by a conditional branch.\n");
+ return false;
+ }
+
+ // Notify ScalarEvolution that the loop will be substantially changed,
+ // if not outright eliminated.
+ if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>())
+ SE->forgetLoop(L);
+
+ // Find trip count
+ unsigned TripCount = L->getSmallConstantTripCount();
+ // Find trip multiple if count is not available
+ unsigned TripMultiple = 1;
+ if (TripCount == 0)
+ TripMultiple = L->getSmallConstantTripMultiple();
+
+ if (TripCount != 0)
+ DEBUG(dbgs() << " Trip Count = " << TripCount << "\n");
+ if (TripMultiple != 1)
+ DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n");
+
+ // Effectively "DCE" unrolled iterations that are beyond the tripcount
+ // and will never be executed.
+ if (TripCount != 0 && Count > TripCount)
+ Count = TripCount;
+
+ assert(Count > 0);
+ assert(TripMultiple > 0);
+ assert(TripCount == 0 || TripCount % TripMultiple == 0);
+
+ // Are we eliminating the loop control altogether?
+ bool CompletelyUnroll = Count == TripCount;
+
+ // If we know the trip count, we know the multiple...
+ unsigned BreakoutTrip = 0;
+ if (TripCount != 0) {
+ BreakoutTrip = TripCount % Count;
+ TripMultiple = 0;
+ } else {
+ // Figure out what multiple to use.
+ BreakoutTrip = TripMultiple =
+ (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
+ }
+
+ if (CompletelyUnroll) {
+ DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
+ << " with trip count " << TripCount << "!\n");
+ } else {
+ DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
+ << " by " << Count);
+ if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
+ DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
+ } else if (TripMultiple != 1) {
+ DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+ }
+ DEBUG(dbgs() << "!\n");
+ }
+
+ std::vector<BasicBlock*> LoopBlocks = L->getBlocks();
+
+ bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
+ BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
+
+ // For the first iteration of the loop, we should use the precloned values for
+ // PHI nodes. Insert associations now.
+ typedef ValueMap<const Value*, Value*> ValueToValueMapTy;
+ ValueToValueMapTy LastValueMap;
+ std::vector<PHINode*> OrigPHINode;
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ OrigPHINode.push_back(PN);
+ if (Instruction *I =
+ dyn_cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)))
+ if (L->contains(I))
+ LastValueMap[I] = I;
+ }
+
+ std::vector<BasicBlock*> Headers;
+ std::vector<BasicBlock*> Latches;
+ Headers.push_back(Header);
+ Latches.push_back(LatchBlock);
+
+ for (unsigned It = 1; It != Count; ++It) {
+ std::vector<BasicBlock*> NewBlocks;
+
+ for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(),
+ E = LoopBlocks.end(); BB != E; ++BB) {
+ ValueToValueMapTy VMap;
+ BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
+ Header->getParent()->getBasicBlockList().push_back(New);
+
+ // Loop over all of the PHI nodes in the block, changing them to use the
+ // incoming values from the previous block.
+ if (*BB == Header)
+ for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
+ PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]);
+ Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
+ if (Instruction *InValI = dyn_cast<Instruction>(InVal))
+ if (It > 1 && L->contains(InValI))
+ InVal = LastValueMap[InValI];
+ VMap[OrigPHINode[i]] = InVal;
+ New->getInstList().erase(NewPHI);
+ }
+
+ // Update our running map of newest clones
+ LastValueMap[*BB] = New;
+ for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
+ VI != VE; ++VI)
+ LastValueMap[VI->first] = VI->second;
+
+ L->addBasicBlockToLoop(New, LI->getBase());
+
+ // Add phi entries for newly created values to all exit blocks except
+ // the successor of the latch block. The successor of the exit block will
+ // be updated specially after unrolling all the way.
+ if (*BB != LatchBlock)
+ for (Value::use_iterator UI = (*BB)->use_begin(), UE = (*BB)->use_end();
+ UI != UE;) {
+ Instruction *UseInst = cast<Instruction>(*UI);
+ ++UI;
+ if (isa<PHINode>(UseInst) && !L->contains(UseInst)) {
+ PHINode *phi = cast<PHINode>(UseInst);
+ Value *Incoming = phi->getIncomingValueForBlock(*BB);
+ phi->addIncoming(Incoming, New);
+ }
+ }
+
+ // Keep track of new headers and latches as we create them, so that
+ // we can insert the proper branches later.
+ if (*BB == Header)
+ Headers.push_back(New);
+ if (*BB == LatchBlock) {
+ Latches.push_back(New);
+
+ // Also, clear out the new latch's back edge so that it doesn't look
+ // like a new loop, so that it's amenable to being merged with adjacent
+ // blocks later on.
+ TerminatorInst *Term = New->getTerminator();
+ assert(L->contains(Term->getSuccessor(!ContinueOnTrue)));
+ assert(Term->getSuccessor(ContinueOnTrue) == LoopExit);
+ Term->setSuccessor(!ContinueOnTrue, NULL);
+ }
+
+ NewBlocks.push_back(New);
+ }
+
+ // Remap all instructions in the most recent iteration
+ for (unsigned i = 0; i < NewBlocks.size(); ++i)
+ for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+ E = NewBlocks[i]->end(); I != E; ++I)
+ RemapInstruction(I, LastValueMap);
+ }
+
+ // The latch block exits the loop. If there are any PHI nodes in the
+ // successor blocks, update them to use the appropriate values computed as the
+ // last iteration of the loop.
+ if (Count != 1) {
+ SmallPtrSet<PHINode*, 8> Users;
+ for (Value::use_iterator UI = LatchBlock->use_begin(),
+ UE = LatchBlock->use_end(); UI != UE; ++UI)
+ if (PHINode *phi = dyn_cast<PHINode>(*UI))
+ Users.insert(phi);
+
+ BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]);
+ for (SmallPtrSet<PHINode*,8>::iterator SI = Users.begin(), SE = Users.end();
+ SI != SE; ++SI) {
+ PHINode *PN = *SI;
+ Value *InVal = PN->removeIncomingValue(LatchBlock, false);
+ // If this value was defined in the loop, take the value defined by the
+ // last iteration of the loop.
+ if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
+ if (L->contains(InValI))
+ InVal = LastValueMap[InVal];
+ }
+ PN->addIncoming(InVal, LastIterationBB);
+ }
+ }
+
+ // Now, if we're doing complete unrolling, loop over the PHI nodes in the
+ // original block, setting them to their incoming values.
+ if (CompletelyUnroll) {
+ BasicBlock *Preheader = L->getLoopPreheader();
+ for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
+ PHINode *PN = OrigPHINode[i];
+ PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
+ Header->getInstList().erase(PN);
+ }
+ }
+
+ // Now that all the basic blocks for the unrolled iterations are in place,
+ // set up the branches to connect them.
+ for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+ // The original branch was replicated in each unrolled iteration.
+ BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
+
+ // The branch destination.
+ unsigned j = (i + 1) % e;
+ BasicBlock *Dest = Headers[j];
+ bool NeedConditional = true;
+
+ // For a complete unroll, make the last iteration end with a branch
+ // to the exit block.
+ if (CompletelyUnroll && j == 0) {
+ Dest = LoopExit;
+ NeedConditional = false;
+ }
+
+ // If we know the trip count or a multiple of it, we can safely use an
+ // unconditional branch for some iterations.
+ if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
+ NeedConditional = false;
+ }
+
+ if (NeedConditional) {
+ // Update the conditional branch's successor for the following
+ // iteration.
+ Term->setSuccessor(!ContinueOnTrue, Dest);
+ } else {
+ Term->setUnconditionalDest(Dest);
+ // Merge adjacent basic blocks, if possible.
+ if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) {
+ std::replace(Latches.begin(), Latches.end(), Dest, Fold);
+ std::replace(Headers.begin(), Headers.end(), Dest, Fold);
+ }
+ }
+ }
+
+ // At this point, the code is well formed. We now do a quick sweep over the
+ // inserted code, doing constant propagation and dead code elimination as we
+ // go.
+ const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
+ for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
+ BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
+ for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
+ Instruction *Inst = I++;
+
+ if (isInstructionTriviallyDead(Inst))
+ (*BB)->getInstList().erase(Inst);
+ else if (Constant *C = ConstantFoldInstruction(Inst)) {
+ Inst->replaceAllUsesWith(C);
+ (*BB)->getInstList().erase(Inst);
+ }
+ }
+
+ NumCompletelyUnrolled += CompletelyUnroll;
+ ++NumUnrolled;
+ // Remove the loop from the LoopPassManager if it's completely removed.
+ if (CompletelyUnroll && LPM != NULL)
+ LPM->deleteLoopFromQueue(L);
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
new file mode 100644
index 0000000..a46dd84
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -0,0 +1,608 @@
+//===- LowerInvoke.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which do not yet
+// support stack unwinding. This pass supports two models of exception handling
+// lowering, the 'cheap' support and the 'expensive' support.
+//
+// 'Cheap' exception handling support gives the program the ability to execute
+// any program which does not "throw an exception", by turning 'invoke'
+// instructions into calls and by turning 'unwind' instructions into calls to
+// abort(). If the program does dynamically use the unwind instruction, the
+// program will print a message then abort.
+//
+// 'Expensive' exception handling support gives the full exception handling
+// support to the program at the cost of making the 'invoke' instruction
+// really expensive. It basically inserts setjmp/longjmp calls to emulate the
+// exception handling as necessary.
+//
+// Because the 'expensive' support slows down programs a lot, and EH is only
+// used for a subset of the programs, it must be specifically enabled by an
+// option.
+//
+// Note that after this pass runs the CFG is not entirely accurate (exceptional
+// control flow edges are not correct anymore) so only very simple things should
+// be done after the lowerinvoke pass has run (like generation of native code).
+// This should not be used as a general purpose "my LLVM-to-LLVM pass doesn't
+// support the invoke instruction yet" lowering pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lowerinvoke"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLowering.h"
+#include <csetjmp>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInvokes, "Number of invokes replaced");
+STATISTIC(NumUnwinds, "Number of unwinds replaced");
+STATISTIC(NumSpilled, "Number of registers live across unwind edges");
+
+static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support",
+ cl::desc("Make the -lowerinvoke pass insert expensive, but correct, EH code"));
+
+namespace {
+ class LowerInvoke : public FunctionPass {
+ // Used for both models.
+ Constant *AbortFn;
+
+ // Used for expensive EH support.
+ const Type *JBLinkTy;
+ GlobalVariable *JBListHead;
+ Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn;
+ bool useExpensiveEHSupport;
+
+ // We peek in TLI to grab the target's jmp_buf size and alignment
+ const TargetLowering *TLI;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit LowerInvoke(const TargetLowering *tli = NULL,
+ bool useExpensiveEHSupport = ExpensiveEHSupport)
+ : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport),
+ TLI(tli) { }
+ bool doInitialization(Module &M);
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ // This is a cluster of orthogonal Transforms
+ AU.addPreserved("mem2reg");
+ AU.addPreservedID(LowerSwitchID);
+ }
+
+ private:
+ bool insertCheapEHSupport(Function &F);
+ void splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*>&Invokes);
+ void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
+ AllocaInst *InvokeNum, AllocaInst *StackPtr,
+ SwitchInst *CatchSwitch);
+ bool insertExpensiveEHSupport(Function &F);
+ };
+}
+
+char LowerInvoke::ID = 0;
+INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
+ "Lower invoke and unwind, for unwindless code generators",
+ false, false);
+
+char &llvm::LowerInvokePassID = LowerInvoke::ID;
+
+// Public Interface To the LowerInvoke pass.
+FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
+ return new LowerInvoke(TLI, ExpensiveEHSupport);
+}
+FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI,
+ bool useExpensiveEHSupport) {
+ return new LowerInvoke(TLI, useExpensiveEHSupport);
+}
+
+// doInitialization - Make sure that there is a prototype for abort in the
+// current module.
+bool LowerInvoke::doInitialization(Module &M) {
+ const Type *VoidPtrTy =
+ Type::getInt8PtrTy(M.getContext());
+ if (useExpensiveEHSupport) {
+ // Insert a type for the linked list of jump buffers.
+ unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
+ JBSize = JBSize ? JBSize : 200;
+ const Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
+
+ { // The type is recursive, so use a type holder.
+ std::vector<const Type*> Elements;
+ Elements.push_back(JmpBufTy);
+ OpaqueType *OT = OpaqueType::get(M.getContext());
+ Elements.push_back(PointerType::getUnqual(OT));
+ PATypeHolder JBLType(StructType::get(M.getContext(), Elements));
+ OT->refineAbstractTypeTo(JBLType.get()); // Complete the cycle.
+ JBLinkTy = JBLType.get();
+ M.addTypeName("llvm.sjljeh.jmpbufty", JBLinkTy);
+ }
+
+ const Type *PtrJBList = PointerType::getUnqual(JBLinkTy);
+
+ // Now that we've done that, insert the jmpbuf list head global, unless it
+ // already exists.
+ if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) {
+ JBListHead = new GlobalVariable(M, PtrJBList, false,
+ GlobalValue::LinkOnceAnyLinkage,
+ Constant::getNullValue(PtrJBList),
+ "llvm.sjljeh.jblist");
+ }
+
+// VisualStudio defines setjmp as _setjmp via #include <csetjmp> / <setjmp.h>,
+// so it looks like Intrinsic::_setjmp
+#if defined(_MSC_VER) && defined(setjmp)
+#define setjmp_undefined_for_visual_studio
+#undef setjmp
+#endif
+
+ SetJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::setjmp);
+
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_visual_studio)
+// let's return it to _setjmp state in case anyone ever needs it after this
+// point under VisualStudio
+#define setjmp _setjmp
+#endif
+
+ LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp);
+ StackSaveFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
+ StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
+ }
+
+ // We need the 'write' and 'abort' functions for both models.
+ AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()),
+ (Type *)0);
+ return true;
+}
+
+bool LowerInvoke::insertCheapEHSupport(Function &F) {
+ bool Changed = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
+ // Insert a normal call instruction...
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(),
+ CallArgs.begin(), CallArgs.end(),
+ "",II);
+ NewCall->takeName(II);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setAttributes(II->getAttributes());
+ II->replaceAllUsesWith(NewCall);
+
+ // Insert an unconditional branch to the normal destination.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Remove any PHI node entries from the exception destination.
+ II->getUnwindDest()->removePredecessor(BB);
+
+ // Remove the invoke instruction now.
+ BB->getInstList().erase(II);
+
+ ++NumInvokes; Changed = true;
+ } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+ // Insert a call to abort()
+ CallInst::Create(AbortFn, "", UI)->setTailCall();
+
+ // Insert a return instruction. This really should be a "barrier", as it
+ // is unreachable.
+ ReturnInst::Create(F.getContext(),
+ F.getReturnType()->isVoidTy() ?
+ 0 : Constant::getNullValue(F.getReturnType()), UI);
+
+ // Remove the unwind instruction now.
+ BB->getInstList().erase(UI);
+
+ ++NumUnwinds; Changed = true;
+ }
+ return Changed;
+}
+
+/// rewriteExpensiveInvoke - Insert code and hack the function to replace the
+/// specified invoke instruction with a call.
+void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
+ AllocaInst *InvokeNum,
+ AllocaInst *StackPtr,
+ SwitchInst *CatchSwitch) {
+ ConstantInt *InvokeNoC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
+ InvokeNo);
+
+ // If the unwind edge has phi nodes, split the edge.
+ if (isa<PHINode>(II->getUnwindDest()->begin())) {
+ SplitCriticalEdge(II, 1, this);
+
+ // If there are any phi nodes left, they must have a single predecessor.
+ while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ PN->eraseFromParent();
+ }
+ }
+
+ // Insert a store of the invoke num before the invoke and store zero into the
+ // location afterward.
+ new StoreInst(InvokeNoC, InvokeNum, true, II); // volatile
+
+ // Insert a store of the stack ptr before the invoke, so we can restore it
+ // later in the exception case.
+ CallInst* StackSaveRet = CallInst::Create(StackSaveFn, "ssret", II);
+ new StoreInst(StackSaveRet, StackPtr, true, II); // volatile
+
+ BasicBlock::iterator NI = II->getNormalDest()->getFirstNonPHI();
+ // nonvolatile.
+ new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())),
+ InvokeNum, false, NI);
+
+ Instruction* StackPtrLoad = new LoadInst(StackPtr, "stackptr.restore", true,
+ II->getUnwindDest()->getFirstNonPHI()
+ );
+ CallInst::Create(StackRestoreFn, StackPtrLoad, "")->insertAfter(StackPtrLoad);
+
+ // Add a switch case to our unwind block.
+ CatchSwitch->addCase(InvokeNoC, II->getUnwindDest());
+
+ // Insert a normal call instruction.
+ SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(),
+ CallArgs.begin(), CallArgs.end(), "",
+ II);
+ NewCall->takeName(II);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setAttributes(II->getAttributes());
+ II->replaceAllUsesWith(NewCall);
+
+ // Replace the invoke with an uncond branch.
+ BranchInst::Create(II->getNormalDest(), NewCall->getParent());
+ II->eraseFromParent();
+}
+
+/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
+/// we reach blocks we've already seen.
+static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
+ if (!LiveBBs.insert(BB).second) return; // already been here.
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ MarkBlocksLiveIn(*PI, LiveBBs);
+}
+
+// First thing we need to do is scan the whole function for values that are
+// live across unwind edges. Each value that is live across an unwind edge
+// we spill into a stack location, guaranteeing that there is nothing live
+// across the unwind edge. This process also splits all critical edges
+// coming out of invoke's.
+void LowerInvoke::
+splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) {
+ // First step, split all critical edges from invoke instructions.
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ InvokeInst *II = Invokes[i];
+ SplitCriticalEdge(II, 0, this);
+ SplitCriticalEdge(II, 1, this);
+ assert(!isa<PHINode>(II->getNormalDest()) &&
+ !isa<PHINode>(II->getUnwindDest()) &&
+ "critical edge splitting left single entry phi nodes?");
+ }
+
+ Function *F = Invokes.back()->getParent()->getParent();
+
+ // To avoid having to handle incoming arguments specially, we lower each arg
+ // to a copy instruction in the entry block. This ensures that the argument
+ // value itself cannot be live across the entry block.
+ BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
+ while (isa<AllocaInst>(AfterAllocaInsertPt) &&
+ isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
+ ++AfterAllocaInsertPt;
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI) {
+ const Type *Ty = AI->getType();
+ // Aggregate types can't be cast, but are legal argument types, so we have
+ // to handle them differently. We use an extract/insert pair as a
+ // lightweight method to achieve the same goal.
+ if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
+ Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
+ Instruction *NI = InsertValueInst::Create(AI, EI, 0);
+ NI->insertAfter(EI);
+ AI->replaceAllUsesWith(NI);
+ // Set the operand of the instructions back to the AllocaInst.
+ EI->setOperand(0, AI);
+ NI->setOperand(0, AI);
+ } else {
+ // This is always a no-op cast because we're casting AI to AI->getType()
+ // so src and destination types are identical. BitCast is the only
+ // possibility.
+ CastInst *NC = new BitCastInst(
+ AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
+ AI->replaceAllUsesWith(NC);
+ // Set the operand of the cast instruction back to the AllocaInst.
+ // Normally it's forbidden to replace a CastInst's operand because it
+ // could cause the opcode to reflect an illegal conversion. However,
+ // we're replacing it here with the same value it was constructed with.
+ // We do this because the above replaceAllUsesWith() clobbered the
+ // operand, but we want this one to remain.
+ NC->setOperand(0, AI);
+ }
+ }
+
+ // Finally, scan the code looking for instructions with bad live ranges.
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+ // Ignore obvious cases we don't have to handle. In particular, most
+ // instructions either have no uses or only have a single use inside the
+ // current block. Ignore them quickly.
+ Instruction *Inst = II;
+ if (Inst->use_empty()) continue;
+ if (Inst->hasOneUse() &&
+ cast<Instruction>(Inst->use_back())->getParent() == BB &&
+ !isa<PHINode>(Inst->use_back())) continue;
+
+ // If this is an alloca in the entry block, it's not a real register
+ // value.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
+ if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
+ continue;
+
+ // Avoid iterator invalidation by copying users to a temporary vector.
+ SmallVector<Instruction*,16> Users;
+ for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (User->getParent() != BB || isa<PHINode>(User))
+ Users.push_back(User);
+ }
+
+ // Scan all of the uses and see if the live range is live across an unwind
+ // edge. If we find a use live across an invoke edge, create an alloca
+ // and spill the value.
+ std::set<InvokeInst*> InvokesWithStoreInserted;
+
+ // Find all of the blocks that this value is live in.
+ std::set<BasicBlock*> LiveBBs;
+ LiveBBs.insert(Inst->getParent());
+ while (!Users.empty()) {
+ Instruction *U = Users.back();
+ Users.pop_back();
+
+ if (!isa<PHINode>(U)) {
+ MarkBlocksLiveIn(U->getParent(), LiveBBs);
+ } else {
+ // Uses for a PHI node occur in their predecessor block.
+ PHINode *PN = cast<PHINode>(U);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == Inst)
+ MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
+ }
+ }
+
+ // Now that we know all of the blocks that this thing is live in, see if
+ // it includes any of the unwind locations.
+ bool NeedsSpill = false;
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+ if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+ NeedsSpill = true;
+ }
+ }
+
+ // If we decided we need a spill, do it.
+ if (NeedsSpill) {
+ ++NumSpilled;
+ DemoteRegToStack(*Inst, true);
+ }
+ }
+}
+
+bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
+ SmallVector<ReturnInst*,16> Returns;
+ SmallVector<UnwindInst*,16> Unwinds;
+ SmallVector<InvokeInst*,16> Invokes;
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ // Remember all return instructions in case we insert an invoke into this
+ // function.
+ Returns.push_back(RI);
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ Invokes.push_back(II);
+ } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+ Unwinds.push_back(UI);
+ }
+
+ if (Unwinds.empty() && Invokes.empty()) return false;
+
+ NumInvokes += Invokes.size();
+ NumUnwinds += Unwinds.size();
+
+ // TODO: This is not an optimal way to do this. In particular, this always
+ // inserts setjmp calls into the entries of functions with invoke instructions
+ // even though there are possibly paths through the function that do not
+ // execute any invokes. In particular, for functions with early exits, e.g.
+ // the 'addMove' method in hexxagon, it would be nice to not have to do the
+ // setjmp stuff on the early exit path. This requires a bit of dataflow, but
+ // would not be too hard to do.
+
+ // If we have an invoke instruction, insert a setjmp that dominates all
+ // invokes. After the setjmp, use a cond branch that goes to the original
+ // code path on zero, and to a designated 'catch' block of nonzero.
+ Value *OldJmpBufPtr = 0;
+ if (!Invokes.empty()) {
+ // First thing we need to do is scan the whole function for values that are
+ // live across unwind edges. Each value that is live across an unwind edge
+ // we spill into a stack location, guaranteeing that there is nothing live
+ // across the unwind edge. This process also splits all critical edges
+ // coming out of invoke's.
+ splitLiveRangesLiveAcrossInvokes(Invokes);
+
+ BasicBlock *EntryBB = F.begin();
+
+ // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+ // that needs to be restored on all exits from the function. This is an
+ // alloca because the value needs to be live across invokes.
+ unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;
+ AllocaInst *JmpBuf =
+ new AllocaInst(JBLinkTy, 0, Align,
+ "jblink", F.begin()->begin());
+
+ Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 1) };
+ OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2],
+ "OldBuf",
+ EntryBB->getTerminator());
+
+ // Copy the JBListHead to the alloca.
+ Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true,
+ EntryBB->getTerminator());
+ new StoreInst(OldBuf, OldJmpBufPtr, true, EntryBB->getTerminator());
+
+ // Add the new jumpbuf to the list.
+ new StoreInst(JmpBuf, JBListHead, true, EntryBB->getTerminator());
+
+ // Create the catch block. The catch block is basically a big switch
+ // statement that goes to all of the invoke catch blocks.
+ BasicBlock *CatchBB =
+ BasicBlock::Create(F.getContext(), "setjmp.catch", &F);
+
+ // Create an alloca which keeps track of the stack pointer before every
+ // invoke, this allows us to properly restore the stack pointer after
+ // long jumping.
+ AllocaInst *StackPtr = new AllocaInst(Type::getInt8PtrTy(F.getContext()), 0,
+ "stackptr", EntryBB->begin());
+
+ // Create an alloca which keeps track of which invoke is currently
+ // executing. For normal calls it contains zero.
+ AllocaInst *InvokeNum = new AllocaInst(Type::getInt32Ty(F.getContext()), 0,
+ "invokenum",EntryBB->begin());
+ new StoreInst(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
+ InvokeNum, true, EntryBB->getTerminator());
+
+ // Insert a load in the Catch block, and a switch on its value. By default,
+ // we go to a block that just does an unwind (which is the correct action
+ // for a standard call).
+ BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F);
+ Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBB));
+
+ Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB);
+ SwitchInst *CatchSwitch =
+ SwitchInst::Create(CatchLoad, UnwindBB, Invokes.size(), CatchBB);
+
+ // Now that things are set up, insert the setjmp call itself.
+
+ // Split the entry block to insert the conditional branch for the setjmp.
+ BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
+ "setjmp.cont");
+
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0);
+ Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2],
+ "TheJmpBuf",
+ EntryBB->getTerminator());
+ JmpBufPtr = new BitCastInst(JmpBufPtr,
+ Type::getInt8PtrTy(F.getContext()),
+ "tmp", EntryBB->getTerminator());
+ Value *SJRet = CallInst::Create(SetJmpFn, JmpBufPtr, "sjret",
+ EntryBB->getTerminator());
+
+ // Compare the return value to zero.
+ Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
+ ICmpInst::ICMP_EQ, SJRet,
+ Constant::getNullValue(SJRet->getType()),
+ "notunwind");
+ // Nuke the uncond branch.
+ EntryBB->getTerminator()->eraseFromParent();
+
+ // Put in a new condbranch in its place.
+ BranchInst::Create(ContBlock, CatchBB, IsNormal, EntryBB);
+
+ // At this point, we are all set up, rewrite each invoke instruction.
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
+ rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, StackPtr, CatchSwitch);
+ }
+
+ // We know that there is at least one unwind.
+
+ // Create three new blocks, the block to load the jmpbuf ptr and compare
+ // against null, the block to do the longjmp, and the error block for if it
+ // is null. Add them at the end of the function because they are not hot.
+ BasicBlock *UnwindHandler = BasicBlock::Create(F.getContext(),
+ "dounwind", &F);
+ BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwind", &F);
+ BasicBlock *TermBlock = BasicBlock::Create(F.getContext(), "unwinderror", &F);
+
+ // If this function contains an invoke, restore the old jumpbuf ptr.
+ Value *BufPtr;
+ if (OldJmpBufPtr) {
+ // Before the return, insert a copy from the saved value to the new value.
+ BufPtr = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", UnwindHandler);
+ new StoreInst(BufPtr, JBListHead, UnwindHandler);
+ } else {
+ BufPtr = new LoadInst(JBListHead, "ehlist", UnwindHandler);
+ }
+
+ // Load the JBList, if it's null, then there was no catch!
+ Value *NotNull = new ICmpInst(*UnwindHandler, ICmpInst::ICMP_NE, BufPtr,
+ Constant::getNullValue(BufPtr->getType()),
+ "notnull");
+ BranchInst::Create(UnwindBlock, TermBlock, NotNull, UnwindHandler);
+
+ // Create the block to do the longjmp.
+ // Get a pointer to the jmpbuf and longjmp.
+ Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) };
+ Idx[0] = GetElementPtrInst::Create(BufPtr, &Idx[0], &Idx[2], "JmpBuf",
+ UnwindBlock);
+ Idx[0] = new BitCastInst(Idx[0],
+ Type::getInt8PtrTy(F.getContext()),
+ "tmp", UnwindBlock);
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
+ CallInst::Create(LongJmpFn, &Idx[0], &Idx[2], "", UnwindBlock);
+ new UnreachableInst(F.getContext(), UnwindBlock);
+
+ // Set up the term block ("throw without a catch").
+ new UnreachableInst(F.getContext(), TermBlock);
+
+ // Insert a call to abort()
+ CallInst::Create(AbortFn, "",
+ TermBlock->getTerminator())->setTailCall();
+
+
+ // Replace all unwinds with a branch to the unwind handler.
+ for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
+ BranchInst::Create(UnwindHandler, Unwinds[i]);
+ Unwinds[i]->eraseFromParent();
+ }
+
+ // Finally, for any returns from this function, if this function contains an
+ // invoke, restore the old jmpbuf pointer to its input value.
+ if (OldJmpBufPtr) {
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ ReturnInst *R = Returns[i];
+
+ // Before the return, insert a copy from the saved value to the new value.
+ Value *OldBuf = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", true, R);
+ new StoreInst(OldBuf, JBListHead, true, R);
+ }
+ }
+
+ return true;
+}
+
+bool LowerInvoke::runOnFunction(Function &F) {
+ if (useExpensiveEHSupport)
+ return insertExpensiveEHSupport(F);
+ else
+ return insertCheapEHSupport(F);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
new file mode 100644
index 0000000..5530b47
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -0,0 +1,320 @@
+//===- LowerSwitch.cpp - Eliminate Switch instructions --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LowerSwitch transformation rewrites switch instructions with a sequence
+// of branches, which allows targets to get away with not implementing the
+// switch instruction until it is convenient.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+ /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
+ /// instructions.
+ class LowerSwitch : public FunctionPass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ LowerSwitch() : FunctionPass(ID) {}
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ // This is a cluster of orthogonal Transforms
+ AU.addPreserved<UnifyFunctionExitNodes>();
+ AU.addPreserved("mem2reg");
+ AU.addPreservedID(LowerInvokePassID);
+ }
+
+ struct CaseRange {
+ Constant* Low;
+ Constant* High;
+ BasicBlock* BB;
+
+ CaseRange(Constant *low = 0, Constant *high = 0, BasicBlock *bb = 0) :
+ Low(low), High(high), BB(bb) { }
+ };
+
+ typedef std::vector<CaseRange> CaseVector;
+ typedef std::vector<CaseRange>::iterator CaseItr;
+ private:
+ void processSwitchInst(SwitchInst *SI);
+
+ BasicBlock* switchConvert(CaseItr Begin, CaseItr End, Value* Val,
+ BasicBlock* OrigBlock, BasicBlock* Default);
+ BasicBlock* newLeafBlock(CaseRange& Leaf, Value* Val,
+ BasicBlock* OrigBlock, BasicBlock* Default);
+ unsigned Clusterify(CaseVector& Cases, SwitchInst *SI);
+ };
+
+ /// The comparison function for sorting the switch case values in the vector.
+ /// WARNING: Case ranges should be disjoint!
+ struct CaseCmp {
+ bool operator () (const LowerSwitch::CaseRange& C1,
+ const LowerSwitch::CaseRange& C2) {
+
+ const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+ const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+ return CI1->getValue().slt(CI2->getValue());
+ }
+ };
+}
+
+char LowerSwitch::ID = 0;
+INITIALIZE_PASS(LowerSwitch, "lowerswitch",
+ "Lower SwitchInst's to branches", false, false);
+
+// Publically exposed interface to pass...
+char &llvm::LowerSwitchID = LowerSwitch::ID;
+// createLowerSwitchPass - Interface to this file...
+FunctionPass *llvm::createLowerSwitchPass() {
+ return new LowerSwitch();
+}
+
+bool LowerSwitch::runOnFunction(Function &F) {
+ bool Changed = false;
+
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+ BasicBlock *Cur = I++; // Advance over block so we don't traverse new blocks
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
+ Changed = true;
+ processSwitchInst(SI);
+ }
+ }
+
+ return Changed;
+}
+
+// operator<< - Used for debugging purposes.
+//
+static raw_ostream& operator<<(raw_ostream &O,
+ const LowerSwitch::CaseVector &C) ATTRIBUTE_USED;
+static raw_ostream& operator<<(raw_ostream &O,
+ const LowerSwitch::CaseVector &C) {
+ O << "[";
+
+ for (LowerSwitch::CaseVector::const_iterator B = C.begin(),
+ E = C.end(); B != E; ) {
+ O << *B->Low << " -" << *B->High;
+ if (++B != E) O << ", ";
+ }
+
+ return O << "]";
+}
+
+// switchConvert - Convert the switch statement into a binary lookup of
+// the case values. The function recursively builds this tree.
+//
+BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
+ Value* Val, BasicBlock* OrigBlock,
+ BasicBlock* Default)
+{
+ unsigned Size = End - Begin;
+
+ if (Size == 1)
+ return newLeafBlock(*Begin, Val, OrigBlock, Default);
+
+ unsigned Mid = Size / 2;
+ std::vector<CaseRange> LHS(Begin, Begin + Mid);
+ DEBUG(dbgs() << "LHS: " << LHS << "\n");
+ std::vector<CaseRange> RHS(Begin + Mid, End);
+ DEBUG(dbgs() << "RHS: " << RHS << "\n");
+
+ CaseRange& Pivot = *(Begin + Mid);
+ DEBUG(dbgs() << "Pivot ==> "
+ << cast<ConstantInt>(Pivot.Low)->getValue() << " -"
+ << cast<ConstantInt>(Pivot.High)->getValue() << "\n");
+
+ BasicBlock* LBranch = switchConvert(LHS.begin(), LHS.end(), Val,
+ OrigBlock, Default);
+ BasicBlock* RBranch = switchConvert(RHS.begin(), RHS.end(), Val,
+ OrigBlock, Default);
+
+ // Create a new node that checks if the value is < pivot. Go to the
+ // left branch if it is and right branch if not.
+ Function* F = OrigBlock->getParent();
+ BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock");
+ Function::iterator FI = OrigBlock;
+ F->getBasicBlockList().insert(++FI, NewNode);
+
+ ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT,
+ Val, Pivot.Low, "Pivot");
+ NewNode->getInstList().push_back(Comp);
+ BranchInst::Create(LBranch, RBranch, Comp, NewNode);
+ return NewNode;
+}
+
+// newLeafBlock - Create a new leaf block for the binary lookup tree. It
+// checks if the switch's value == the case's value. If not, then it
+// jumps to the default branch. At this point in the tree, the value
+// can't be another valid case value, so the jump to the "default" branch
+// is warranted.
+//
+BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
+ BasicBlock* OrigBlock,
+ BasicBlock* Default)
+{
+ Function* F = OrigBlock->getParent();
+ BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
+ Function::iterator FI = OrigBlock;
+ F->getBasicBlockList().insert(++FI, NewLeaf);
+
+ // Emit comparison
+ ICmpInst* Comp = NULL;
+ if (Leaf.Low == Leaf.High) {
+ // Make the seteq instruction...
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val,
+ Leaf.Low, "SwitchLeaf");
+ } else {
+ // Make range comparison
+ if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) {
+ // Val >= Min && Val <= Hi --> Val <= Hi
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
+ "SwitchLeaf");
+ } else if (cast<ConstantInt>(Leaf.Low)->isZero()) {
+ // Val >= 0 && Val <= Hi --> Val <=u Hi
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
+ "SwitchLeaf");
+ } else {
+ // Emit V-Lo <=u Hi-Lo
+ Constant* NegLo = ConstantExpr::getNeg(Leaf.Low);
+ Instruction* Add = BinaryOperator::CreateAdd(Val, NegLo,
+ Val->getName()+".off",
+ NewLeaf);
+ Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High);
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound,
+ "SwitchLeaf");
+ }
+ }
+
+ // Make the conditional branch...
+ BasicBlock* Succ = Leaf.BB;
+ BranchInst::Create(Succ, Default, Comp, NewLeaf);
+
+ // If there were any PHI nodes in this successor, rewrite one entry
+ // from OrigBlock to come from NewLeaf.
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode* PN = cast<PHINode>(I);
+ // Remove all but one incoming entries from the cluster
+ uint64_t Range = cast<ConstantInt>(Leaf.High)->getSExtValue() -
+ cast<ConstantInt>(Leaf.Low)->getSExtValue();
+ for (uint64_t j = 0; j < Range; ++j) {
+ PN->removeIncomingValue(OrigBlock);
+ }
+
+ int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+ assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+ PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf);
+ }
+
+ return NewLeaf;
+}
+
+// Clusterify - Transform simple list of Cases into list of CaseRange's
+unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
+ unsigned numCmps = 0;
+
+ // Start with "simple" cases
+ for (unsigned i = 1; i < SI->getNumSuccessors(); ++i)
+ Cases.push_back(CaseRange(SI->getSuccessorValue(i),
+ SI->getSuccessorValue(i),
+ SI->getSuccessor(i)));
+ std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+ // Merge case into clusters
+ if (Cases.size()>=2)
+ for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) {
+ int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
+ int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
+ BasicBlock* nextBB = J->BB;
+ BasicBlock* currentBB = I->BB;
+
+ // If the two neighboring cases go to the same destination, merge them
+ // into a single case.
+ if ((nextValue-currentValue==1) && (currentBB == nextBB)) {
+ I->High = J->High;
+ J = Cases.erase(J);
+ } else {
+ I = J++;
+ }
+ }
+
+ for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+ if (I->Low != I->High)
+ // A range counts double, since it requires two compares.
+ ++numCmps;
+ }
+
+ return numCmps;
+}
+
+// processSwitchInst - Replace the specified switch instruction with a sequence
+// of chained if-then insts in a balanced binary search.
+//
+void LowerSwitch::processSwitchInst(SwitchInst *SI) {
+ BasicBlock *CurBlock = SI->getParent();
+ BasicBlock *OrigBlock = CurBlock;
+ Function *F = CurBlock->getParent();
+ Value *Val = SI->getOperand(0); // The value we are switching on...
+ BasicBlock* Default = SI->getDefaultDest();
+
+ // If there is only the default destination, don't bother with the code below.
+ if (SI->getNumOperands() == 2) {
+ BranchInst::Create(SI->getDefaultDest(), CurBlock);
+ CurBlock->getInstList().erase(SI);
+ return;
+ }
+
+ // Create a new, empty default block so that the new hierarchy of
+ // if-then statements go to this and the PHI nodes are happy.
+ BasicBlock* NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
+ F->getBasicBlockList().insert(Default, NewDefault);
+
+ BranchInst::Create(Default, NewDefault);
+
+ // If there is an entry in any PHI nodes for the default edge, make sure
+ // to update them as well.
+ for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+ assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+ PN->setIncomingBlock((unsigned)BlockIdx, NewDefault);
+ }
+
+ // Prepare cases vector.
+ CaseVector Cases;
+ unsigned numCmps = Clusterify(Cases, SI);
+
+ DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+ << ". Total compares: " << numCmps << "\n");
+ DEBUG(dbgs() << "Cases: " << Cases << "\n");
+ (void)numCmps;
+
+ BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val,
+ OrigBlock, NewDefault);
+
+ // Branch to our shiny new if-then stuff...
+ BranchInst::Create(SwitchBlock, OrigBlock);
+
+ // We are now done with the switch instruction, delete it.
+ CurBlock->getInstList().erase(SI);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/Makefile b/contrib/llvm/lib/Transforms/Utils/Makefile
new file mode 100644
index 0000000..d1e9336
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Transforms/Utils/Makefile -----------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMTransformUtils
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
new file mode 100644
index 0000000..101645b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
@@ -0,0 +1,89 @@
+//===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is a simple pass wrapper around the PromoteMemToReg function call
+// exposed by the Utils library.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mem2reg"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumPromoted, "Number of alloca's promoted");
+
+namespace {
+ struct PromotePass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ PromotePass() : FunctionPass(ID) {}
+
+ // runOnFunction - To run this pass, first we calculate the alloca
+ // instructions that are safe for promotion, then we promote each one.
+ //
+ virtual bool runOnFunction(Function &F);
+
+ // getAnalysisUsage - We need dominance frontiers
+ //
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<DominanceFrontier>();
+ AU.setPreservesCFG();
+ // This is a cluster of orthogonal Transforms
+ AU.addPreserved<UnifyFunctionExitNodes>();
+ AU.addPreservedID(LowerSwitchID);
+ AU.addPreservedID(LowerInvokePassID);
+ }
+ };
+} // end of anonymous namespace
+
+char PromotePass::ID = 0;
+INITIALIZE_PASS(PromotePass, "mem2reg", "Promote Memory to Register",
+ false, false);
+
+bool PromotePass::runOnFunction(Function &F) {
+ std::vector<AllocaInst*> Allocas;
+
+ BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
+
+ bool Changed = false;
+
+ DominatorTree &DT = getAnalysis<DominatorTree>();
+ DominanceFrontier &DF = getAnalysis<DominanceFrontier>();
+
+ while (1) {
+ Allocas.clear();
+
+ // Find allocas that are safe to promote, by looking at all instructions in
+ // the entry node
+ for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
+ if (isAllocaPromotable(AI))
+ Allocas.push_back(AI);
+
+ if (Allocas.empty()) break;
+
+ PromoteMemToReg(Allocas, DT, DF);
+ NumPromoted += Allocas.size();
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+// createPromoteMemoryToRegister - Provide an entry point to create this pass.
+//
+FunctionPass *llvm::createPromoteMemoryToRegisterPass() {
+ return new PromotePass();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
new file mode 100644
index 0000000..a4e3029
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -0,0 +1,1053 @@
+//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file promotes memory references to be register references. It promotes
+// alloca instructions which only have loads and stores as uses. An alloca is
+// transformed by using dominator frontiers to place PHI nodes, then traversing
+// the function in depth-first order to rewrite loads and stores as appropriate.
+// This is just the standard SSA construction algorithm to construct "pruned"
+// SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mem2reg"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Metadata.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CFG.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
+STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store");
+STATISTIC(NumDeadAlloca, "Number of dead alloca's removed");
+STATISTIC(NumPHIInsert, "Number of PHI nodes inserted");
+
+namespace llvm {
+template<>
+struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > {
+ typedef std::pair<BasicBlock*, unsigned> EltTy;
+ static inline EltTy getEmptyKey() {
+ return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U);
+ }
+ static inline EltTy getTombstoneKey() {
+ return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U);
+ }
+ static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) {
+ return DenseMapInfo<void*>::getHashValue(Val.first) + Val.second*2;
+ }
+ static bool isEqual(const EltTy &LHS, const EltTy &RHS) {
+ return LHS == RHS;
+ }
+};
+}
+
+/// isAllocaPromotable - Return true if this alloca is legal for promotion.
+/// This is true if there are only loads and stores to the alloca.
+///
+bool llvm::isAllocaPromotable(const AllocaInst *AI) {
+ // FIXME: If the memory unit is of pointer or integer type, we can permit
+ // assignments to subsections of the memory unit.
+
+ // Only allow direct and non-volatile loads and stores...
+ for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE; ++UI) { // Loop over all of the uses of the alloca
+ const User *U = *UI;
+ if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ if (LI->isVolatile())
+ return false;
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getOperand(0) == AI)
+ return false; // Don't allow a store OF the AI, only INTO the AI.
+ if (SI->isVolatile())
+ return false;
+ } else {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the
+/// alloca 'V', if any.
+static DbgDeclareInst *FindAllocaDbgDeclare(Value *V) {
+ if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), &V, 1))
+ for (Value::use_iterator UI = DebugNode->use_begin(),
+ E = DebugNode->use_end(); UI != E; ++UI)
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+ return DDI;
+
+ return 0;
+}
+
+namespace {
+ struct AllocaInfo;
+
+ // Data package used by RenamePass()
+ class RenamePassData {
+ public:
+ typedef std::vector<Value *> ValVector;
+
+ RenamePassData() : BB(NULL), Pred(NULL), Values() {}
+ RenamePassData(BasicBlock *B, BasicBlock *P,
+ const ValVector &V) : BB(B), Pred(P), Values(V) {}
+ BasicBlock *BB;
+ BasicBlock *Pred;
+ ValVector Values;
+
+ void swap(RenamePassData &RHS) {
+ std::swap(BB, RHS.BB);
+ std::swap(Pred, RHS.Pred);
+ Values.swap(RHS.Values);
+ }
+ };
+
+ /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of
+ /// load/store instructions in the block that directly load or store an alloca.
+ ///
+ /// This functionality is important because it avoids scanning large basic
+ /// blocks multiple times when promoting many allocas in the same block.
+ class LargeBlockInfo {
+ /// InstNumbers - For each instruction that we track, keep the index of the
+ /// instruction. The index starts out as the number of the instruction from
+ /// the start of the block.
+ DenseMap<const Instruction *, unsigned> InstNumbers;
+ public:
+
+ /// isInterestingInstruction - This code only looks at accesses to allocas.
+ static bool isInterestingInstruction(const Instruction *I) {
+ return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
+ (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
+ }
+
+ /// getInstructionIndex - Get or calculate the index of the specified
+ /// instruction.
+ unsigned getInstructionIndex(const Instruction *I) {
+ assert(isInterestingInstruction(I) &&
+ "Not a load/store to/from an alloca?");
+
+ // If we already have this instruction number, return it.
+ DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
+ if (It != InstNumbers.end()) return It->second;
+
+ // Scan the whole block to get the instruction. This accumulates
+ // information for every interesting instruction in the block, in order to
+ // avoid gratuitus rescans.
+ const BasicBlock *BB = I->getParent();
+ unsigned InstNo = 0;
+ for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end();
+ BBI != E; ++BBI)
+ if (isInterestingInstruction(BBI))
+ InstNumbers[BBI] = InstNo++;
+ It = InstNumbers.find(I);
+
+ assert(It != InstNumbers.end() && "Didn't insert instruction?");
+ return It->second;
+ }
+
+ void deleteValue(const Instruction *I) {
+ InstNumbers.erase(I);
+ }
+
+ void clear() {
+ InstNumbers.clear();
+ }
+ };
+
+ struct PromoteMem2Reg {
+ /// Allocas - The alloca instructions being promoted.
+ ///
+ std::vector<AllocaInst*> Allocas;
+ DominatorTree &DT;
+ DominanceFrontier &DF;
+ DIFactory *DIF;
+
+ /// AST - An AliasSetTracker object to update. If null, don't update it.
+ ///
+ AliasSetTracker *AST;
+
+ /// AllocaLookup - Reverse mapping of Allocas.
+ ///
+ std::map<AllocaInst*, unsigned> AllocaLookup;
+
+ /// NewPhiNodes - The PhiNodes we're adding.
+ ///
+ DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes;
+
+ /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas
+ /// it corresponds to.
+ DenseMap<PHINode*, unsigned> PhiToAllocaMap;
+
+ /// PointerAllocaValues - If we are updating an AliasSetTracker, then for
+ /// each alloca that is of pointer type, we keep track of what to copyValue
+ /// to the inserted PHI nodes here.
+ ///
+ std::vector<Value*> PointerAllocaValues;
+
+ /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare
+ /// intrinsic that describes it, if any, so that we can convert it to a
+ /// dbg.value intrinsic if the alloca gets promoted.
+ SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares;
+
+ /// Visited - The set of basic blocks the renamer has already visited.
+ ///
+ SmallPtrSet<BasicBlock*, 16> Visited;
+
+ /// BBNumbers - Contains a stable numbering of basic blocks to avoid
+ /// non-determinstic behavior.
+ DenseMap<BasicBlock*, unsigned> BBNumbers;
+
+ /// BBNumPreds - Lazily compute the number of predecessors a block has.
+ DenseMap<const BasicBlock*, unsigned> BBNumPreds;
+ public:
+ PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
+ DominanceFrontier &df, AliasSetTracker *ast)
+ : Allocas(A), DT(dt), DF(df), DIF(0), AST(ast) {}
+ ~PromoteMem2Reg() {
+ delete DIF;
+ }
+
+ void run();
+
+ /// dominates - Return true if BB1 dominates BB2 using the DominatorTree.
+ ///
+ bool dominates(BasicBlock *BB1, BasicBlock *BB2) const {
+ return DT.dominates(BB1, BB2);
+ }
+
+ private:
+ void RemoveFromAllocasList(unsigned &AllocaIdx) {
+ Allocas[AllocaIdx] = Allocas.back();
+ Allocas.pop_back();
+ --AllocaIdx;
+ }
+
+ unsigned getNumPreds(const BasicBlock *BB) {
+ unsigned &NP = BBNumPreds[BB];
+ if (NP == 0)
+ NP = std::distance(pred_begin(BB), pred_end(BB))+1;
+ return NP-1;
+ }
+
+ void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
+ AllocaInfo &Info);
+ void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
+ const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
+ SmallPtrSet<BasicBlock*, 32> &LiveInBlocks);
+
+ void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
+ LargeBlockInfo &LBI);
+ void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
+ LargeBlockInfo &LBI);
+ void ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI);
+
+
+ void RenamePass(BasicBlock *BB, BasicBlock *Pred,
+ RenamePassData::ValVector &IncVals,
+ std::vector<RenamePassData> &Worklist);
+ bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version,
+ SmallPtrSet<PHINode*, 16> &InsertedPHINodes);
+ };
+
+ struct AllocaInfo {
+ std::vector<BasicBlock*> DefiningBlocks;
+ std::vector<BasicBlock*> UsingBlocks;
+
+ StoreInst *OnlyStore;
+ BasicBlock *OnlyBlock;
+ bool OnlyUsedInOneBlock;
+
+ Value *AllocaPointerVal;
+ DbgDeclareInst *DbgDeclare;
+
+ void clear() {
+ DefiningBlocks.clear();
+ UsingBlocks.clear();
+ OnlyStore = 0;
+ OnlyBlock = 0;
+ OnlyUsedInOneBlock = true;
+ AllocaPointerVal = 0;
+ DbgDeclare = 0;
+ }
+
+ /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our
+ /// ivars.
+ void AnalyzeAlloca(AllocaInst *AI) {
+ clear();
+
+ // As we scan the uses of the alloca instruction, keep track of stores,
+ // and decide whether all of the loads and stores to the alloca are within
+ // the same basic block.
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E;) {
+ Instruction *User = cast<Instruction>(*UI++);
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Remember the basic blocks which define new values for the alloca
+ DefiningBlocks.push_back(SI->getParent());
+ AllocaPointerVal = SI->getOperand(0);
+ OnlyStore = SI;
+ } else {
+ LoadInst *LI = cast<LoadInst>(User);
+ // Otherwise it must be a load instruction, keep track of variable
+ // reads.
+ UsingBlocks.push_back(LI->getParent());
+ AllocaPointerVal = LI;
+ }
+
+ if (OnlyUsedInOneBlock) {
+ if (OnlyBlock == 0)
+ OnlyBlock = User->getParent();
+ else if (OnlyBlock != User->getParent())
+ OnlyUsedInOneBlock = false;
+ }
+ }
+
+ DbgDeclare = FindAllocaDbgDeclare(AI);
+ }
+ };
+} // end of anonymous namespace
+
+
+void PromoteMem2Reg::run() {
+ Function &F = *DF.getRoot()->getParent();
+
+ if (AST) PointerAllocaValues.resize(Allocas.size());
+ AllocaDbgDeclares.resize(Allocas.size());
+
+ AllocaInfo Info;
+ LargeBlockInfo LBI;
+
+ for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
+ AllocaInst *AI = Allocas[AllocaNum];
+
+ assert(isAllocaPromotable(AI) &&
+ "Cannot promote non-promotable alloca!");
+ assert(AI->getParent()->getParent() == &F &&
+ "All allocas should be in the same function, which is same as DF!");
+
+ if (AI->use_empty()) {
+ // If there are no uses of the alloca, just delete it now.
+ if (AST) AST->deleteValue(AI);
+ AI->eraseFromParent();
+
+ // Remove the alloca from the Allocas list, since it has been processed
+ RemoveFromAllocasList(AllocaNum);
+ ++NumDeadAlloca;
+ continue;
+ }
+
+ // Calculate the set of read and write-locations for each alloca. This is
+ // analogous to finding the 'uses' and 'definitions' of each variable.
+ Info.AnalyzeAlloca(AI);
+
+ // If there is only a single store to this value, replace any loads of
+ // it that are directly dominated by the definition with the value stored.
+ if (Info.DefiningBlocks.size() == 1) {
+ RewriteSingleStoreAlloca(AI, Info, LBI);
+
+ // Finally, after the scan, check to see if the store is all that is left.
+ if (Info.UsingBlocks.empty()) {
+ // Record debuginfo for the store and remove the declaration's debuginfo.
+ if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+ ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore);
+ DDI->eraseFromParent();
+ }
+ // Remove the (now dead) store and alloca.
+ Info.OnlyStore->eraseFromParent();
+ LBI.deleteValue(Info.OnlyStore);
+
+ if (AST) AST->deleteValue(AI);
+ AI->eraseFromParent();
+ LBI.deleteValue(AI);
+
+ // The alloca has been processed, move on.
+ RemoveFromAllocasList(AllocaNum);
+
+ ++NumSingleStore;
+ continue;
+ }
+ }
+
+ // If the alloca is only read and written in one basic block, just perform a
+ // linear sweep over the block to eliminate it.
+ if (Info.OnlyUsedInOneBlock) {
+ PromoteSingleBlockAlloca(AI, Info, LBI);
+
+ // Finally, after the scan, check to see if the stores are all that is
+ // left.
+ if (Info.UsingBlocks.empty()) {
+
+ // Remove the (now dead) stores and alloca.
+ while (!AI->use_empty()) {
+ StoreInst *SI = cast<StoreInst>(AI->use_back());
+ // Record debuginfo for the store before removing it.
+ if (DbgDeclareInst *DDI = Info.DbgDeclare)
+ ConvertDebugDeclareToDebugValue(DDI, SI);
+ SI->eraseFromParent();
+ LBI.deleteValue(SI);
+ }
+
+ if (AST) AST->deleteValue(AI);
+ AI->eraseFromParent();
+ LBI.deleteValue(AI);
+
+ // The alloca has been processed, move on.
+ RemoveFromAllocasList(AllocaNum);
+
+ // The alloca's debuginfo can be removed as well.
+ if (DbgDeclareInst *DDI = Info.DbgDeclare)
+ DDI->eraseFromParent();
+
+ ++NumLocalPromoted;
+ continue;
+ }
+ }
+
+ // If we haven't computed a numbering for the BB's in the function, do so
+ // now.
+ if (BBNumbers.empty()) {
+ unsigned ID = 0;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ BBNumbers[I] = ID++;
+ }
+
+ // If we have an AST to keep updated, remember some pointer value that is
+ // stored into the alloca.
+ if (AST)
+ PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;
+
+ // Remember the dbg.declare intrinsic describing this alloca, if any.
+ if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
+
+ // Keep the reverse mapping of the 'Allocas' array for the rename pass.
+ AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
+
+ // At this point, we're committed to promoting the alloca using IDF's, and
+ // the standard SSA construction algorithm. Determine which blocks need PHI
+ // nodes and see if we can optimize out some work by avoiding insertion of
+ // dead phi nodes.
+ DetermineInsertionPoint(AI, AllocaNum, Info);
+ }
+
+ if (Allocas.empty())
+ return; // All of the allocas must have been trivial!
+
+ LBI.clear();
+
+
+ // Set the incoming values for the basic block to be null values for all of
+ // the alloca's. We do this in case there is a load of a value that has not
+ // been stored yet. In this case, it will get this null value.
+ //
+ RenamePassData::ValVector Values(Allocas.size());
+ for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
+ Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
+
+ // Walks all basic blocks in the function performing the SSA rename algorithm
+ // and inserting the phi nodes we marked as necessary
+ //
+ std::vector<RenamePassData> RenamePassWorkList;
+ RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values));
+ do {
+ RenamePassData RPD;
+ RPD.swap(RenamePassWorkList.back());
+ RenamePassWorkList.pop_back();
+ // RenamePass may add new worklist entries.
+ RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
+ } while (!RenamePassWorkList.empty());
+
+ // The renamer uses the Visited set to avoid infinite loops. Clear it now.
+ Visited.clear();
+
+ // Remove the allocas themselves from the function.
+ for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
+ Instruction *A = Allocas[i];
+
+ // If there are any uses of the alloca instructions left, they must be in
+ // sections of dead code that were not processed on the dominance frontier.
+ // Just delete the users now.
+ //
+ if (!A->use_empty())
+ A->replaceAllUsesWith(UndefValue::get(A->getType()));
+ if (AST) AST->deleteValue(A);
+ A->eraseFromParent();
+ }
+
+ // Remove alloca's dbg.declare instrinsics from the function.
+ for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
+ if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
+ DDI->eraseFromParent();
+
+ // Loop over all of the PHI nodes and see if there are any that we can get
+ // rid of because they merge all of the same incoming values. This can
+ // happen due to undef values coming into the PHI nodes. This process is
+ // iterative, because eliminating one PHI node can cause others to be removed.
+ bool EliminatedAPHI = true;
+ while (EliminatedAPHI) {
+ EliminatedAPHI = false;
+
+ for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
+ NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) {
+ PHINode *PN = I->second;
+
+ // If this PHI node merges one value and/or undefs, get the value.
+ if (Value *V = PN->hasConstantValue(&DT)) {
+ if (AST && PN->getType()->isPointerTy())
+ AST->deleteValue(PN);
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ NewPhiNodes.erase(I++);
+ EliminatedAPHI = true;
+ continue;
+ }
+ ++I;
+ }
+ }
+
+ // At this point, the renamer has added entries to PHI nodes for all reachable
+ // code. Unfortunately, there may be unreachable blocks which the renamer
+ // hasn't traversed. If this is the case, the PHI nodes may not
+ // have incoming values for all predecessors. Loop over all PHI nodes we have
+ // created, inserting undef values if they are missing any incoming values.
+ //
+ for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
+ NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) {
+ // We want to do this once per basic block. As such, only process a block
+ // when we find the PHI that is the first entry in the block.
+ PHINode *SomePHI = I->second;
+ BasicBlock *BB = SomePHI->getParent();
+ if (&BB->front() != SomePHI)
+ continue;
+
+ // Only do work here if there the PHI nodes are missing incoming values. We
+ // know that all PHI nodes that were inserted in a block will have the same
+ // number of incoming values, so we can just check any of them.
+ if (SomePHI->getNumIncomingValues() == getNumPreds(BB))
+ continue;
+
+ // Get the preds for BB.
+ SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
+
+ // Ok, now we know that all of the PHI nodes are missing entries for some
+ // basic blocks. Start by sorting the incoming predecessors for efficient
+ // access.
+ std::sort(Preds.begin(), Preds.end());
+
+ // Now we loop through all BB's which have entries in SomePHI and remove
+ // them from the Preds list.
+ for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
+ // Do a log(n) search of the Preds list for the entry we want.
+ SmallVector<BasicBlock*, 16>::iterator EntIt =
+ std::lower_bound(Preds.begin(), Preds.end(),
+ SomePHI->getIncomingBlock(i));
+ assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&&
+ "PHI node has entry for a block which is not a predecessor!");
+
+ // Remove the entry
+ Preds.erase(EntIt);
+ }
+
+ // At this point, the blocks left in the preds list must have dummy
+ // entries inserted into every PHI nodes for the block. Update all the phi
+ // nodes in this block that we are inserting (there could be phis before
+ // mem2reg runs).
+ unsigned NumBadPreds = SomePHI->getNumIncomingValues();
+ BasicBlock::iterator BBI = BB->begin();
+ while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
+ SomePHI->getNumIncomingValues() == NumBadPreds) {
+ Value *UndefVal = UndefValue::get(SomePHI->getType());
+ for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred)
+ SomePHI->addIncoming(UndefVal, Preds[pred]);
+ }
+ }
+
+ NewPhiNodes.clear();
+}
+
+
+/// ComputeLiveInBlocks - Determine which blocks the value is live in. These
+/// are blocks which lead to uses. Knowing this allows us to avoid inserting
+/// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes
+/// would be dead).
+void PromoteMem2Reg::
+ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
+ const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
+ SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) {
+
+ // To determine liveness, we must iterate through the predecessors of blocks
+ // where the def is live. Blocks are added to the worklist if we need to
+ // check their predecessors. Start with all the using blocks.
+ SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
+ Info.UsingBlocks.end());
+
+ // If any of the using blocks is also a definition block, check to see if the
+ // definition occurs before or after the use. If it happens before the use,
+ // the value isn't really live-in.
+ for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
+ BasicBlock *BB = LiveInBlockWorklist[i];
+ if (!DefBlocks.count(BB)) continue;
+
+ // Okay, this is a block that both uses and defines the value. If the first
+ // reference to the alloca is a def (store), then we know it isn't live-in.
+ for (BasicBlock::iterator I = BB->begin(); ; ++I) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (SI->getOperand(1) != AI) continue;
+
+ // We found a store to the alloca before a load. The alloca is not
+ // actually live-in here.
+ LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
+ LiveInBlockWorklist.pop_back();
+ --i, --e;
+ break;
+ }
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (LI->getOperand(0) != AI) continue;
+
+ // Okay, we found a load before a store to the alloca. It is actually
+ // live into this block.
+ break;
+ }
+ }
+ }
+
+ // Now that we have a set of blocks where the phi is live-in, recursively add
+ // their predecessors until we find the full region the value is live.
+ while (!LiveInBlockWorklist.empty()) {
+ BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
+
+ // The block really is live in here, insert it into the set. If already in
+ // the set, then it has already been processed.
+ if (!LiveInBlocks.insert(BB))
+ continue;
+
+ // Since the value is live into BB, it is either defined in a predecessor or
+ // live into it to. Add the preds to the worklist unless they are a
+ // defining block.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+
+ // The value is not live into a predecessor if it defines the value.
+ if (DefBlocks.count(P))
+ continue;
+
+ // Otherwise it is, add to the worklist.
+ LiveInBlockWorklist.push_back(P);
+ }
+ }
+}
+
+/// DetermineInsertionPoint - At this point, we're committed to promoting the
+/// alloca using IDF's, and the standard SSA construction algorithm. Determine
+/// which blocks need phi nodes and see if we can optimize out some work by
+/// avoiding insertion of dead phi nodes.
+void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
+ AllocaInfo &Info) {
+
+ // Unique the set of defining blocks for efficient lookup.
+ SmallPtrSet<BasicBlock*, 32> DefBlocks;
+ DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
+
+ // Determine which blocks the value is live in. These are blocks which lead
+ // to uses.
+ SmallPtrSet<BasicBlock*, 32> LiveInBlocks;
+ ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
+
+ // Compute the locations where PhiNodes need to be inserted. Look at the
+ // dominance frontier of EACH basic-block we have a write in.
+ unsigned CurrentVersion = 0;
+ SmallPtrSet<PHINode*, 16> InsertedPHINodes;
+ std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks;
+ while (!Info.DefiningBlocks.empty()) {
+ BasicBlock *BB = Info.DefiningBlocks.back();
+ Info.DefiningBlocks.pop_back();
+
+ // Look up the DF for this write, add it to defining blocks.
+ DominanceFrontier::const_iterator it = DF.find(BB);
+ if (it == DF.end()) continue;
+
+ const DominanceFrontier::DomSetType &S = it->second;
+
+ // In theory we don't need the indirection through the DFBlocks vector.
+ // In practice, the order of calling QueuePhiNode would depend on the
+ // (unspecified) ordering of basic blocks in the dominance frontier,
+ // which would give PHI nodes non-determinstic subscripts. Fix this by
+ // processing blocks in order of the occurance in the function.
+ for (DominanceFrontier::DomSetType::const_iterator P = S.begin(),
+ PE = S.end(); P != PE; ++P) {
+ // If the frontier block is not in the live-in set for the alloca, don't
+ // bother processing it.
+ if (!LiveInBlocks.count(*P))
+ continue;
+
+ DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P));
+ }
+
+ // Sort by which the block ordering in the function.
+ if (DFBlocks.size() > 1)
+ std::sort(DFBlocks.begin(), DFBlocks.end());
+
+ for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) {
+ BasicBlock *BB = DFBlocks[i].second;
+ if (QueuePhiNode(BB, AllocaNum, CurrentVersion, InsertedPHINodes))
+ Info.DefiningBlocks.push_back(BB);
+ }
+ DFBlocks.clear();
+ }
+}
+
+/// RewriteSingleStoreAlloca - If there is only a single store to this value,
+/// replace any loads of it that are directly dominated by the definition with
+/// the value stored.
+void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI,
+ AllocaInfo &Info,
+ LargeBlockInfo &LBI) {
+ StoreInst *OnlyStore = Info.OnlyStore;
+ bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
+ BasicBlock *StoreBB = OnlyStore->getParent();
+ int StoreIndex = -1;
+
+ // Clear out UsingBlocks. We will reconstruct it here if needed.
+ Info.UsingBlocks.clear();
+
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) {
+ Instruction *UserInst = cast<Instruction>(*UI++);
+ if (!isa<LoadInst>(UserInst)) {
+ assert(UserInst == OnlyStore && "Should only have load/stores");
+ continue;
+ }
+ LoadInst *LI = cast<LoadInst>(UserInst);
+
+ // Okay, if we have a load from the alloca, we want to replace it with the
+ // only value stored to the alloca. We can do this if the value is
+ // dominated by the store. If not, we use the rest of the mem2reg machinery
+ // to insert the phi nodes as needed.
+ if (!StoringGlobalVal) { // Non-instructions are always dominated.
+ if (LI->getParent() == StoreBB) {
+ // If we have a use that is in the same block as the store, compare the
+ // indices of the two instructions to see which one came first. If the
+ // load came before the store, we can't handle it.
+ if (StoreIndex == -1)
+ StoreIndex = LBI.getInstructionIndex(OnlyStore);
+
+ if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
+ // Can't handle this load, bail out.
+ Info.UsingBlocks.push_back(StoreBB);
+ continue;
+ }
+
+ } else if (LI->getParent() != StoreBB &&
+ !dominates(StoreBB, LI->getParent())) {
+ // If the load and store are in different blocks, use BB dominance to
+ // check their relationships. If the store doesn't dom the use, bail
+ // out.
+ Info.UsingBlocks.push_back(LI->getParent());
+ continue;
+ }
+ }
+
+ // Otherwise, we *can* safely rewrite this load.
+ Value *ReplVal = OnlyStore->getOperand(0);
+ // If the replacement value is the load, this must occur in unreachable
+ // code.
+ if (ReplVal == LI)
+ ReplVal = UndefValue::get(LI->getType());
+ LI->replaceAllUsesWith(ReplVal);
+ if (AST && LI->getType()->isPointerTy())
+ AST->deleteValue(LI);
+ LI->eraseFromParent();
+ LBI.deleteValue(LI);
+ }
+}
+
+namespace {
+
+/// StoreIndexSearchPredicate - This is a helper predicate used to search by the
+/// first element of a pair.
+struct StoreIndexSearchPredicate {
+ bool operator()(const std::pair<unsigned, StoreInst*> &LHS,
+ const std::pair<unsigned, StoreInst*> &RHS) {
+ return LHS.first < RHS.first;
+ }
+};
+
+}
+
+/// PromoteSingleBlockAlloca - Many allocas are only used within a single basic
+/// block. If this is the case, avoid traversing the CFG and inserting a lot of
+/// potentially useless PHI nodes by just performing a single linear pass over
+/// the basic block using the Alloca.
+///
+/// If we cannot promote this alloca (because it is read before it is written),
+/// return true. This is necessary in cases where, due to control flow, the
+/// alloca is potentially undefined on some control flow paths. e.g. code like
+/// this is potentially correct:
+///
+/// for (...) { if (c) { A = undef; undef = B; } }
+///
+/// ... so long as A is not used before undef is set.
+///
+void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
+ LargeBlockInfo &LBI) {
+ // The trickiest case to handle is when we have large blocks. Because of this,
+ // this code is optimized assuming that large blocks happen. This does not
+ // significantly pessimize the small block case. This uses LargeBlockInfo to
+ // make it efficient to get the index of various operations in the block.
+
+ // Clear out UsingBlocks. We will reconstruct it here if needed.
+ Info.UsingBlocks.clear();
+
+ // Walk the use-def list of the alloca, getting the locations of all stores.
+ typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy;
+ StoresByIndexTy StoresByIndex;
+
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E; ++UI)
+ if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+ StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
+
+ // If there are no stores to the alloca, just replace any loads with undef.
+ if (StoresByIndex.empty()) {
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;)
+ if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) {
+ LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+ if (AST && LI->getType()->isPointerTy())
+ AST->deleteValue(LI);
+ LBI.deleteValue(LI);
+ LI->eraseFromParent();
+ }
+ return;
+ }
+
+ // Sort the stores by their index, making it efficient to do a lookup with a
+ // binary search.
+ std::sort(StoresByIndex.begin(), StoresByIndex.end());
+
+ // Walk all of the loads from this alloca, replacing them with the nearest
+ // store above them, if any.
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI++);
+ if (!LI) continue;
+
+ unsigned LoadIdx = LBI.getInstructionIndex(LI);
+
+ // Find the nearest store that has a lower than this load.
+ StoresByIndexTy::iterator I =
+ std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
+ std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)),
+ StoreIndexSearchPredicate());
+
+ // If there is no store before this load, then we can't promote this load.
+ if (I == StoresByIndex.begin()) {
+ // Can't handle this load, bail out.
+ Info.UsingBlocks.push_back(LI->getParent());
+ continue;
+ }
+
+ // Otherwise, there was a store before this load, the load takes its value.
+ --I;
+ LI->replaceAllUsesWith(I->second->getOperand(0));
+ if (AST && LI->getType()->isPointerTy())
+ AST->deleteValue(LI);
+ LI->eraseFromParent();
+ LBI.deleteValue(LI);
+ }
+}
+
+// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+// that has an associated llvm.dbg.decl intrinsic.
+void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+ StoreInst *SI) {
+ DIVariable DIVar(DDI->getVariable());
+ if (!DIVar.Verify())
+ return;
+
+ if (!DIF)
+ DIF = new DIFactory(*SI->getParent()->getParent()->getParent());
+ Instruction *DbgVal = DIF->InsertDbgValueIntrinsic(SI->getOperand(0), 0,
+ DIVar, SI);
+
+ // Propagate any debug metadata from the store onto the dbg.value.
+ DebugLoc SIDL = SI->getDebugLoc();
+ if (!SIDL.isUnknown())
+ DbgVal->setDebugLoc(SIDL);
+ // Otherwise propagate debug metadata from dbg.declare.
+ else
+ DbgVal->setDebugLoc(DDI->getDebugLoc());
+}
+
+// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific
+// Alloca returns true if there wasn't already a phi-node for that variable
+//
+bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
+ unsigned &Version,
+ SmallPtrSet<PHINode*, 16> &InsertedPHINodes) {
+ // Look up the basic-block in question.
+ PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)];
+
+ // If the BB already has a phi node added for the i'th alloca then we're done!
+ if (PN) return false;
+
+ // Create a PhiNode using the dereferenced type... and add the phi-node to the
+ // BasicBlock.
+ PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(),
+ Allocas[AllocaNo]->getName() + "." + Twine(Version++),
+ BB->begin());
+ ++NumPHIInsert;
+ PhiToAllocaMap[PN] = AllocaNo;
+ PN->reserveOperandSpace(getNumPreds(BB));
+
+ InsertedPHINodes.insert(PN);
+
+ if (AST && PN->getType()->isPointerTy())
+ AST->copyValue(PointerAllocaValues[AllocaNo], PN);
+
+ return true;
+}
+
+// RenamePass - Recursively traverse the CFG of the function, renaming loads and
+// stores to the allocas which we are promoting. IncomingVals indicates what
+// value each Alloca contains on exit from the predecessor block Pred.
+//
+void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
+ RenamePassData::ValVector &IncomingVals,
+ std::vector<RenamePassData> &Worklist) {
+NextIteration:
+ // If we are inserting any phi nodes into this BB, they will already be in the
+ // block.
+ if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) {
+ // If we have PHI nodes to update, compute the number of edges from Pred to
+ // BB.
+ if (PhiToAllocaMap.count(APN)) {
+ // We want to be able to distinguish between PHI nodes being inserted by
+ // this invocation of mem2reg from those phi nodes that already existed in
+ // the IR before mem2reg was run. We determine that APN is being inserted
+ // because it is missing incoming edges. All other PHI nodes being
+ // inserted by this pass of mem2reg will have the same number of incoming
+ // operands so far. Remember this count.
+ unsigned NewPHINumOperands = APN->getNumOperands();
+
+ unsigned NumEdges = 0;
+ for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I)
+ if (*I == BB)
+ ++NumEdges;
+ assert(NumEdges && "Must be at least one edge from Pred to BB!");
+
+ // Add entries for all the phis.
+ BasicBlock::iterator PNI = BB->begin();
+ do {
+ unsigned AllocaNo = PhiToAllocaMap[APN];
+
+ // Add N incoming values to the PHI node.
+ for (unsigned i = 0; i != NumEdges; ++i)
+ APN->addIncoming(IncomingVals[AllocaNo], Pred);
+
+ // The currently active variable for this block is now the PHI.
+ IncomingVals[AllocaNo] = APN;
+
+ // Get the next phi node.
+ ++PNI;
+ APN = dyn_cast<PHINode>(PNI);
+ if (APN == 0) break;
+
+ // Verify that it is missing entries. If not, it is not being inserted
+ // by this mem2reg invocation so we want to ignore it.
+ } while (APN->getNumOperands() == NewPHINumOperands);
+ }
+ }
+
+ // Don't revisit blocks.
+ if (!Visited.insert(BB)) return;
+
+ for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) {
+ Instruction *I = II++; // get the instruction, increment iterator
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
+ if (!Src) continue;
+
+ std::map<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src);
+ if (AI == AllocaLookup.end()) continue;
+
+ Value *V = IncomingVals[AI->second];
+
+ // Anything using the load now uses the current value.
+ LI->replaceAllUsesWith(V);
+ if (AST && LI->getType()->isPointerTy())
+ AST->deleteValue(LI);
+ BB->getInstList().erase(LI);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Delete this instruction and mark the name as the current holder of the
+ // value
+ AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
+ if (!Dest) continue;
+
+ std::map<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
+ if (ai == AllocaLookup.end())
+ continue;
+
+ // what value were we writing?
+ IncomingVals[ai->second] = SI->getOperand(0);
+ // Record debuginfo for the store before removing it.
+ if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second])
+ ConvertDebugDeclareToDebugValue(DDI, SI);
+ BB->getInstList().erase(SI);
+ }
+ }
+
+ // 'Recurse' to our successors.
+ succ_iterator I = succ_begin(BB), E = succ_end(BB);
+ if (I == E) return;
+
+ // Keep track of the successors so we don't visit the same successor twice
+ SmallPtrSet<BasicBlock*, 8> VisitedSuccs;
+
+ // Handle the first successor without using the worklist.
+ VisitedSuccs.insert(*I);
+ Pred = BB;
+ BB = *I;
+ ++I;
+
+ for (; I != E; ++I)
+ if (VisitedSuccs.insert(*I))
+ Worklist.push_back(RenamePassData(*I, Pred, IncomingVals));
+
+ goto NextIteration;
+}
+
+/// PromoteMemToReg - Promote the specified list of alloca instructions into
+/// scalar registers, inserting PHI nodes as appropriate. This function makes
+/// use of DominanceFrontier information. This function does not modify the CFG
+/// of the function at all. All allocas must be from the same function.
+///
+/// If AST is specified, the specified tracker is updated to reflect changes
+/// made to the IR.
+///
+void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
+ DominatorTree &DT, DominanceFrontier &DF,
+ AliasSetTracker *AST) {
+ // If there is nothing to do, bail out...
+ if (Allocas.empty()) return;
+
+ PromoteMem2Reg(Allocas, DT, DF, AST).run();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
new file mode 100644
index 0000000..c855988
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -0,0 +1,344 @@
+//===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SSAUpdater class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ssaupdater"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+using namespace llvm;
+
+typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
+static AvailableValsTy &getAvailableVals(void *AV) {
+ return *static_cast<AvailableValsTy*>(AV);
+}
+
+SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
+ : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {}
+
+SSAUpdater::~SSAUpdater() {
+ delete &getAvailableVals(AV);
+}
+
+/// Initialize - Reset this object to get ready for a new set of SSA
+/// updates with type 'Ty'. PHI nodes get a name based on 'Name'.
+void SSAUpdater::Initialize(const Type *Ty, StringRef Name) {
+ if (AV == 0)
+ AV = new AvailableValsTy();
+ else
+ getAvailableVals(AV).clear();
+ ProtoType = Ty;
+ ProtoName = Name;
+}
+
+/// HasValueForBlock - Return true if the SSAUpdater already has a value for
+/// the specified block.
+bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
+ return getAvailableVals(AV).count(BB);
+}
+
+/// AddAvailableValue - Indicate that a rewritten value is available in the
+/// specified block with the specified value.
+void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
+ assert(ProtoType != 0 && "Need to initialize SSAUpdater");
+ assert(ProtoType == V->getType() &&
+ "All rewritten values must have the same type");
+ getAvailableVals(AV)[BB] = V;
+}
+
+/// IsEquivalentPHI - Check if PHI has the same incoming value as specified
+/// in ValueMapping for each predecessor block.
+static bool IsEquivalentPHI(PHINode *PHI,
+ DenseMap<BasicBlock*, Value*> &ValueMapping) {
+ unsigned PHINumValues = PHI->getNumIncomingValues();
+ if (PHINumValues != ValueMapping.size())
+ return false;
+
+ // Scan the phi to see if it matches.
+ for (unsigned i = 0, e = PHINumValues; i != e; ++i)
+ if (ValueMapping[PHI->getIncomingBlock(i)] !=
+ PHI->getIncomingValue(i)) {
+ return false;
+ }
+
+ return true;
+}
+
+/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+/// live at the end of the specified block.
+Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
+ Value *Res = GetValueAtEndOfBlockInternal(BB);
+ return Res;
+}
+
+/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+/// is live in the middle of the specified block.
+///
+/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+/// important case: if there is a definition of the rewritten value after the
+/// 'use' in BB. Consider code like this:
+///
+/// X1 = ...
+/// SomeBB:
+/// use(X)
+/// X2 = ...
+/// br Cond, SomeBB, OutBB
+///
+/// In this case, there are two values (X1 and X2) added to the AvailableVals
+/// set by the client of the rewriter, and those values are both live out of
+/// their respective blocks. However, the use of X happens in the *middle* of
+/// a block. Because of this, we need to insert a new PHI node in SomeBB to
+/// merge the appropriate values, and this value isn't live out of the block.
+///
+Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
+ // If there is no definition of the renamed variable in this block, just use
+ // GetValueAtEndOfBlock to do our work.
+ if (!HasValueForBlock(BB))
+ return GetValueAtEndOfBlock(BB);
+
+ // Otherwise, we have the hard case. Get the live-in values for each
+ // predecessor.
+ SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues;
+ Value *SingularValue = 0;
+
+ // We can get our predecessor info by walking the pred_iterator list, but it
+ // is relatively slow. If we already have PHI nodes in this block, walk one
+ // of them to get the predecessor list instead.
+ if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+ for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
+ Value *PredVal = GetValueAtEndOfBlock(PredBB);
+ PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+ // Compute SingularValue.
+ if (i == 0)
+ SingularValue = PredVal;
+ else if (PredVal != SingularValue)
+ SingularValue = 0;
+ }
+ } else {
+ bool isFirstPred = true;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *PredBB = *PI;
+ Value *PredVal = GetValueAtEndOfBlock(PredBB);
+ PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+ // Compute SingularValue.
+ if (isFirstPred) {
+ SingularValue = PredVal;
+ isFirstPred = false;
+ } else if (PredVal != SingularValue)
+ SingularValue = 0;
+ }
+ }
+
+ // If there are no predecessors, just return undef.
+ if (PredValues.empty())
+ return UndefValue::get(ProtoType);
+
+ // Otherwise, if all the merged values are the same, just use it.
+ if (SingularValue != 0)
+ return SingularValue;
+
+ // Otherwise, we do need a PHI: check to see if we already have one available
+ // in this block that produces the right value.
+ if (isa<PHINode>(BB->begin())) {
+ DenseMap<BasicBlock*, Value*> ValueMapping(PredValues.begin(),
+ PredValues.end());
+ PHINode *SomePHI;
+ for (BasicBlock::iterator It = BB->begin();
+ (SomePHI = dyn_cast<PHINode>(It)); ++It) {
+ if (IsEquivalentPHI(SomePHI, ValueMapping))
+ return SomePHI;
+ }
+ }
+
+ // Ok, we have no way out, insert a new one now.
+ PHINode *InsertedPHI = PHINode::Create(ProtoType, ProtoName, &BB->front());
+ InsertedPHI->reserveOperandSpace(PredValues.size());
+
+ // Fill in all the predecessors of the PHI.
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+ InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first);
+
+ // See if the PHI node can be merged to a single value. This can happen in
+ // loop cases when we get a PHI of itself and one other value.
+ if (Value *ConstVal = InsertedPHI->hasConstantValue()) {
+ InsertedPHI->eraseFromParent();
+ return ConstVal;
+ }
+
+ // If the client wants to know about all new instructions, tell it.
+ if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+
+ DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n");
+ return InsertedPHI;
+}
+
+/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes,
+/// which use their value in the corresponding predecessor.
+void SSAUpdater::RewriteUse(Use &U) {
+ Instruction *User = cast<Instruction>(U.getUser());
+
+ Value *V;
+ if (PHINode *UserPN = dyn_cast<PHINode>(User))
+ V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+ else
+ V = GetValueInMiddleOfBlock(User->getParent());
+
+ U.set(V);
+}
+
+/// RewriteUseAfterInsertions - Rewrite a use, just like RewriteUse. However,
+/// this version of the method can rewrite uses in the same block as a
+/// definition, because it assumes that all uses of a value are below any
+/// inserted values.
+void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
+ Instruction *User = cast<Instruction>(U.getUser());
+
+ Value *V;
+ if (PHINode *UserPN = dyn_cast<PHINode>(User))
+ V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+ else
+ V = GetValueAtEndOfBlock(User->getParent());
+
+ U.set(V);
+}
+
+/// PHIiter - Iterator for PHI operands. This is used for the PHI_iterator
+/// in the SSAUpdaterImpl template.
+namespace {
+ class PHIiter {
+ private:
+ PHINode *PHI;
+ unsigned idx;
+
+ public:
+ explicit PHIiter(PHINode *P) // begin iterator
+ : PHI(P), idx(0) {}
+ PHIiter(PHINode *P, bool) // end iterator
+ : PHI(P), idx(PHI->getNumIncomingValues()) {}
+
+ PHIiter &operator++() { ++idx; return *this; }
+ bool operator==(const PHIiter& x) const { return idx == x.idx; }
+ bool operator!=(const PHIiter& x) const { return !operator==(x); }
+ Value *getIncomingValue() { return PHI->getIncomingValue(idx); }
+ BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); }
+ };
+}
+
+/// SSAUpdaterTraits<SSAUpdater> - Traits for the SSAUpdaterImpl template,
+/// specialized for SSAUpdater.
+namespace llvm {
+template<>
+class SSAUpdaterTraits<SSAUpdater> {
+public:
+ typedef BasicBlock BlkT;
+ typedef Value *ValT;
+ typedef PHINode PhiT;
+
+ typedef succ_iterator BlkSucc_iterator;
+ static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); }
+ static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); }
+
+ typedef PHIiter PHI_iterator;
+ static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+ static inline PHI_iterator PHI_end(PhiT *PHI) {
+ return PHI_iterator(PHI, true);
+ }
+
+ /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
+ /// vector, set Info->NumPreds, and allocate space in Info->Preds.
+ static void FindPredecessorBlocks(BasicBlock *BB,
+ SmallVectorImpl<BasicBlock*> *Preds) {
+ // We can get our predecessor info by walking the pred_iterator list,
+ // but it is relatively slow. If we already have PHI nodes in this
+ // block, walk one of them to get the predecessor list instead.
+ if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+ for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI)
+ Preds->push_back(SomePhi->getIncomingBlock(PI));
+ } else {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Preds->push_back(*PI);
+ }
+ }
+
+ /// GetUndefVal - Get an undefined value of the same type as the value
+ /// being handled.
+ static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) {
+ return UndefValue::get(Updater->ProtoType);
+ }
+
+ /// CreateEmptyPHI - Create a new PHI instruction in the specified block.
+ /// Reserve space for the operands but do not fill them in yet.
+ static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds,
+ SSAUpdater *Updater) {
+ PHINode *PHI = PHINode::Create(Updater->ProtoType, Updater->ProtoName,
+ &BB->front());
+ PHI->reserveOperandSpace(NumPreds);
+ return PHI;
+ }
+
+ /// AddPHIOperand - Add the specified value as an operand of the PHI for
+ /// the specified predecessor block.
+ static void AddPHIOperand(PHINode *PHI, Value *Val, BasicBlock *Pred) {
+ PHI->addIncoming(Val, Pred);
+ }
+
+ /// InstrIsPHI - Check if an instruction is a PHI.
+ ///
+ static PHINode *InstrIsPHI(Instruction *I) {
+ return dyn_cast<PHINode>(I);
+ }
+
+ /// ValueIsPHI - Check if a value is a PHI.
+ ///
+ static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) {
+ return dyn_cast<PHINode>(Val);
+ }
+
+ /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+ /// operands, i.e., it was just added.
+ static PHINode *ValueIsNewPHI(Value *Val, SSAUpdater *Updater) {
+ PHINode *PHI = ValueIsPHI(Val, Updater);
+ if (PHI && PHI->getNumIncomingValues() == 0)
+ return PHI;
+ return 0;
+ }
+
+ /// GetPHIValue - For the specified PHI instruction, return the value
+ /// that it defines.
+ static Value *GetPHIValue(PHINode *PHI) {
+ return PHI;
+ }
+};
+
+} // End llvm namespace
+
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it. If not, construct SSA form by
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
+Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
+ AvailableValsTy &AvailableVals = getAvailableVals(AV);
+ if (Value *V = AvailableVals[BB])
+ return V;
+
+ SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
+ return Impl.GetValue(BB);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
new file mode 100644
index 0000000..28d7afb
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -0,0 +1,2207 @@
+//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Peephole optimize the CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplifycfg"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Type.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include <algorithm>
+#include <functional>
+#include <set>
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumSpeculations, "Number of speculative executed instructions");
+
+namespace {
+class SimplifyCFGOpt {
+ const TargetData *const TD;
+
+ ConstantInt *GetConstantInt(Value *V);
+ Value *GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values);
+ Value *GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values);
+ bool GatherValueComparisons(Instruction *Cond, Value *&CompVal,
+ std::vector<ConstantInt*> &Values);
+ Value *isValueEqualityComparison(TerminatorInst *TI);
+ BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > &Cases);
+ bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
+ BasicBlock *Pred);
+ bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI);
+
+public:
+ explicit SimplifyCFGOpt(const TargetData *td) : TD(td) {}
+ bool run(BasicBlock *BB);
+};
+}
+
+/// SafeToMergeTerminators - Return true if it is safe to merge these two
+/// terminator instructions together.
+///
+static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
+ if (SI1 == SI2) return false; // Can't merge with self!
+
+ // It is not safe to merge these two switch instructions if they have a common
+ // successor, and if that successor has a PHI node, and if *that* PHI node has
+ // conflicting incoming values from the two switch blocks.
+ BasicBlock *SI1BB = SI1->getParent();
+ BasicBlock *SI2BB = SI2->getParent();
+ SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
+
+ for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I)
+ if (SI1Succs.count(*I))
+ for (BasicBlock::iterator BBI = (*I)->begin();
+ isa<PHINode>(BBI); ++BBI) {
+ PHINode *PN = cast<PHINode>(BBI);
+ if (PN->getIncomingValueForBlock(SI1BB) !=
+ PN->getIncomingValueForBlock(SI2BB))
+ return false;
+ }
+
+ return true;
+}
+
+/// AddPredecessorToBlock - Update PHI nodes in Succ to indicate that there will
+/// now be entries in it from the 'NewPred' block. The values that will be
+/// flowing into the PHI nodes will be the same as those coming in from
+/// ExistPred, an existing predecessor of Succ.
+static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
+ BasicBlock *ExistPred) {
+ assert(std::find(succ_begin(ExistPred), succ_end(ExistPred), Succ) !=
+ succ_end(ExistPred) && "ExistPred is not a predecessor of Succ!");
+ if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do
+
+ PHINode *PN;
+ for (BasicBlock::iterator I = Succ->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I)
+ PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred);
+}
+
+
+/// GetIfCondition - Given a basic block (BB) with two predecessors (and
+/// presumably PHI nodes in it), check to see if the merge at this block is due
+/// to an "if condition". If so, return the boolean condition that determines
+/// which entry into BB will be taken. Also, return by references the block
+/// that will be entered from if the condition is true, and the block that will
+/// be entered if the condition is false.
+///
+///
+static Value *GetIfCondition(BasicBlock *BB,
+ BasicBlock *&IfTrue, BasicBlock *&IfFalse) {
+ assert(std::distance(pred_begin(BB), pred_end(BB)) == 2 &&
+ "Function can only handle blocks with 2 predecessors!");
+ BasicBlock *Pred1 = *pred_begin(BB);
+ BasicBlock *Pred2 = *++pred_begin(BB);
+
+ // We can only handle branches. Other control flow will be lowered to
+ // branches if possible anyway.
+ if (!isa<BranchInst>(Pred1->getTerminator()) ||
+ !isa<BranchInst>(Pred2->getTerminator()))
+ return 0;
+ BranchInst *Pred1Br = cast<BranchInst>(Pred1->getTerminator());
+ BranchInst *Pred2Br = cast<BranchInst>(Pred2->getTerminator());
+
+ // Eliminate code duplication by ensuring that Pred1Br is conditional if
+ // either are.
+ if (Pred2Br->isConditional()) {
+ // If both branches are conditional, we don't have an "if statement". In
+ // reality, we could transform this case, but since the condition will be
+ // required anyway, we stand no chance of eliminating it, so the xform is
+ // probably not profitable.
+ if (Pred1Br->isConditional())
+ return 0;
+
+ std::swap(Pred1, Pred2);
+ std::swap(Pred1Br, Pred2Br);
+ }
+
+ if (Pred1Br->isConditional()) {
+ // If we found a conditional branch predecessor, make sure that it branches
+ // to BB and Pred2Br. If it doesn't, this isn't an "if statement".
+ if (Pred1Br->getSuccessor(0) == BB &&
+ Pred1Br->getSuccessor(1) == Pred2) {
+ IfTrue = Pred1;
+ IfFalse = Pred2;
+ } else if (Pred1Br->getSuccessor(0) == Pred2 &&
+ Pred1Br->getSuccessor(1) == BB) {
+ IfTrue = Pred2;
+ IfFalse = Pred1;
+ } else {
+ // We know that one arm of the conditional goes to BB, so the other must
+ // go somewhere unrelated, and this must not be an "if statement".
+ return 0;
+ }
+
+ // The only thing we have to watch out for here is to make sure that Pred2
+ // doesn't have incoming edges from other blocks. If it does, the condition
+ // doesn't dominate BB.
+ if (++pred_begin(Pred2) != pred_end(Pred2))
+ return 0;
+
+ return Pred1Br->getCondition();
+ }
+
+ // Ok, if we got here, both predecessors end with an unconditional branch to
+ // BB. Don't panic! If both blocks only have a single (identical)
+ // predecessor, and THAT is a conditional branch, then we're all ok!
+ if (pred_begin(Pred1) == pred_end(Pred1) ||
+ ++pred_begin(Pred1) != pred_end(Pred1) ||
+ pred_begin(Pred2) == pred_end(Pred2) ||
+ ++pred_begin(Pred2) != pred_end(Pred2) ||
+ *pred_begin(Pred1) != *pred_begin(Pred2))
+ return 0;
+
+ // Otherwise, if this is a conditional branch, then we can use it!
+ BasicBlock *CommonPred = *pred_begin(Pred1);
+ if (BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator())) {
+ assert(BI->isConditional() && "Two successors but not conditional?");
+ if (BI->getSuccessor(0) == Pred1) {
+ IfTrue = Pred1;
+ IfFalse = Pred2;
+ } else {
+ IfTrue = Pred2;
+ IfFalse = Pred1;
+ }
+ return BI->getCondition();
+ }
+ return 0;
+}
+
+/// DominatesMergePoint - If we have a merge point of an "if condition" as
+/// accepted above, return true if the specified value dominates the block. We
+/// don't handle the true generality of domination here, just a special case
+/// which works well enough for us.
+///
+/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
+/// see if V (which must be an instruction) is cheap to compute and is
+/// non-trapping. If both are true, the instruction is inserted into the set
+/// and true is returned.
+static bool DominatesMergePoint(Value *V, BasicBlock *BB,
+ std::set<Instruction*> *AggressiveInsts) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) {
+ // Non-instructions all dominate instructions, but not all constantexprs
+ // can be executed unconditionally.
+ if (ConstantExpr *C = dyn_cast<ConstantExpr>(V))
+ if (C->canTrap())
+ return false;
+ return true;
+ }
+ BasicBlock *PBB = I->getParent();
+
+ // We don't want to allow weird loops that might have the "if condition" in
+ // the bottom of this block.
+ if (PBB == BB) return false;
+
+ // If this instruction is defined in a block that contains an unconditional
+ // branch to BB, then it must be in the 'conditional' part of the "if
+ // statement".
+ if (BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator()))
+ if (BI->isUnconditional() && BI->getSuccessor(0) == BB) {
+ if (!AggressiveInsts) return false;
+ // Okay, it looks like the instruction IS in the "condition". Check to
+ // see if it's a cheap instruction to unconditionally compute, and if it
+ // only uses stuff defined outside of the condition. If so, hoist it out.
+ if (!I->isSafeToSpeculativelyExecute())
+ return false;
+
+ switch (I->getOpcode()) {
+ default: return false; // Cannot hoist this out safely.
+ case Instruction::Load: {
+ // We have to check to make sure there are no instructions before the
+ // load in its basic block, as we are going to hoist the loop out to
+ // its predecessor.
+ BasicBlock::iterator IP = PBB->begin();
+ while (isa<DbgInfoIntrinsic>(IP))
+ IP++;
+ if (IP != BasicBlock::iterator(I))
+ return false;
+ break;
+ }
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::ICmp:
+ break; // These are all cheap and non-trapping instructions.
+ }
+
+ // Okay, we can only really hoist these out if their operands are not
+ // defined in the conditional region.
+ for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
+ if (!DominatesMergePoint(*i, BB, 0))
+ return false;
+ // Okay, it's safe to do this! Remember this instruction.
+ AggressiveInsts->insert(I);
+ }
+
+ return true;
+}
+
+/// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr
+/// and PointerNullValue. Return NULL if value is not a constant int.
+ConstantInt *SimplifyCFGOpt::GetConstantInt(Value *V) {
+ // Normal constant int.
+ ConstantInt *CI = dyn_cast<ConstantInt>(V);
+ if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy())
+ return CI;
+
+ // This is some kind of pointer constant. Turn it into a pointer-sized
+ // ConstantInt if possible.
+ const IntegerType *PtrTy = TD->getIntPtrType(V->getContext());
+
+ // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
+ if (isa<ConstantPointerNull>(V))
+ return ConstantInt::get(PtrTy, 0);
+
+ // IntToPtr const int.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::IntToPtr)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
+ // The constant is very likely to have the right type already.
+ if (CI->getType() == PtrTy)
+ return CI;
+ else
+ return cast<ConstantInt>
+ (ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
+ }
+ return 0;
+}
+
+/// GatherConstantSetEQs - Given a potentially 'or'd together collection of
+/// icmp_eq instructions that compare a value against a constant, return the
+/// value being compared, and stick the constant into the Values vector.
+Value *SimplifyCFGOpt::
+GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values) {
+ if (Instruction *Inst = dyn_cast<Instruction>(V)) {
+ if (Inst->getOpcode() == Instruction::ICmp &&
+ cast<ICmpInst>(Inst)->getPredicate() == ICmpInst::ICMP_EQ) {
+ if (ConstantInt *C = GetConstantInt(Inst->getOperand(1))) {
+ Values.push_back(C);
+ return Inst->getOperand(0);
+ } else if (ConstantInt *C = GetConstantInt(Inst->getOperand(0))) {
+ Values.push_back(C);
+ return Inst->getOperand(1);
+ }
+ } else if (Inst->getOpcode() == Instruction::Or) {
+ if (Value *LHS = GatherConstantSetEQs(Inst->getOperand(0), Values))
+ if (Value *RHS = GatherConstantSetEQs(Inst->getOperand(1), Values))
+ if (LHS == RHS)
+ return LHS;
+ }
+ }
+ return 0;
+}
+
+/// GatherConstantSetNEs - Given a potentially 'and'd together collection of
+/// setne instructions that compare a value against a constant, return the value
+/// being compared, and stick the constant into the Values vector.
+Value *SimplifyCFGOpt::
+GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values) {
+ if (Instruction *Inst = dyn_cast<Instruction>(V)) {
+ if (Inst->getOpcode() == Instruction::ICmp &&
+ cast<ICmpInst>(Inst)->getPredicate() == ICmpInst::ICMP_NE) {
+ if (ConstantInt *C = GetConstantInt(Inst->getOperand(1))) {
+ Values.push_back(C);
+ return Inst->getOperand(0);
+ } else if (ConstantInt *C = GetConstantInt(Inst->getOperand(0))) {
+ Values.push_back(C);
+ return Inst->getOperand(1);
+ }
+ } else if (Inst->getOpcode() == Instruction::And) {
+ if (Value *LHS = GatherConstantSetNEs(Inst->getOperand(0), Values))
+ if (Value *RHS = GatherConstantSetNEs(Inst->getOperand(1), Values))
+ if (LHS == RHS)
+ return LHS;
+ }
+ }
+ return 0;
+}
+
+/// GatherValueComparisons - If the specified Cond is an 'and' or 'or' of a
+/// bunch of comparisons of one value against constants, return the value and
+/// the constants being compared.
+bool SimplifyCFGOpt::GatherValueComparisons(Instruction *Cond, Value *&CompVal,
+ std::vector<ConstantInt*> &Values) {
+ if (Cond->getOpcode() == Instruction::Or) {
+ CompVal = GatherConstantSetEQs(Cond, Values);
+
+ // Return true to indicate that the condition is true if the CompVal is
+ // equal to one of the constants.
+ return true;
+ } else if (Cond->getOpcode() == Instruction::And) {
+ CompVal = GatherConstantSetNEs(Cond, Values);
+
+ // Return false to indicate that the condition is false if the CompVal is
+ // equal to one of the constants.
+ return false;
+ }
+ return false;
+}
+
+static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
+ Instruction* Cond = 0;
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ Cond = dyn_cast<Instruction>(SI->getCondition());
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isConditional())
+ Cond = dyn_cast<Instruction>(BI->getCondition());
+ }
+
+ TI->eraseFromParent();
+ if (Cond) RecursivelyDeleteTriviallyDeadInstructions(Cond);
+}
+
+/// isValueEqualityComparison - Return true if the specified terminator checks
+/// to see if a value is equal to constant integer value.
+Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
+ Value *CV = 0;
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ // Do not permit merging of large switch instructions into their
+ // predecessors unless there is only one predecessor.
+ if (SI->getNumSuccessors()*std::distance(pred_begin(SI->getParent()),
+ pred_end(SI->getParent())) <= 128)
+ CV = SI->getCondition();
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
+ if (BI->isConditional() && BI->getCondition()->hasOneUse())
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
+ if ((ICI->getPredicate() == ICmpInst::ICMP_EQ ||
+ ICI->getPredicate() == ICmpInst::ICMP_NE) &&
+ GetConstantInt(ICI->getOperand(1)))
+ CV = ICI->getOperand(0);
+
+ // Unwrap any lossless ptrtoint cast.
+ if (TD && CV && CV->getType() == TD->getIntPtrType(CV->getContext()))
+ if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV))
+ CV = PTII->getOperand(0);
+ return CV;
+}
+
+/// GetValueEqualityComparisonCases - Given a value comparison instruction,
+/// decode all of the 'cases' that it represents and return the 'default' block.
+BasicBlock *SimplifyCFGOpt::
+GetValueEqualityComparisonCases(TerminatorInst *TI,
+ std::vector<std::pair<ConstantInt*,
+ BasicBlock*> > &Cases) {
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ Cases.reserve(SI->getNumCases());
+ for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+ Cases.push_back(std::make_pair(SI->getCaseValue(i), SI->getSuccessor(i)));
+ return SI->getDefaultDest();
+ }
+
+ BranchInst *BI = cast<BranchInst>(TI);
+ ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
+ Cases.push_back(std::make_pair(GetConstantInt(ICI->getOperand(1)),
+ BI->getSuccessor(ICI->getPredicate() ==
+ ICmpInst::ICMP_NE)));
+ return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
+}
+
+
+/// EliminateBlockCases - Given a vector of bb/value pairs, remove any entries
+/// in the list that match the specified block.
+static void EliminateBlockCases(BasicBlock *BB,
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > &Cases) {
+ for (unsigned i = 0, e = Cases.size(); i != e; ++i)
+ if (Cases[i].second == BB) {
+ Cases.erase(Cases.begin()+i);
+ --i; --e;
+ }
+}
+
+/// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as
+/// well.
+static bool
+ValuesOverlap(std::vector<std::pair<ConstantInt*, BasicBlock*> > &C1,
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > &C2) {
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > *V1 = &C1, *V2 = &C2;
+
+ // Make V1 be smaller than V2.
+ if (V1->size() > V2->size())
+ std::swap(V1, V2);
+
+ if (V1->size() == 0) return false;
+ if (V1->size() == 1) {
+ // Just scan V2.
+ ConstantInt *TheVal = (*V1)[0].first;
+ for (unsigned i = 0, e = V2->size(); i != e; ++i)
+ if (TheVal == (*V2)[i].first)
+ return true;
+ }
+
+ // Otherwise, just sort both lists and compare element by element.
+ std::sort(V1->begin(), V1->end());
+ std::sort(V2->begin(), V2->end());
+ unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
+ while (i1 != e1 && i2 != e2) {
+ if ((*V1)[i1].first == (*V2)[i2].first)
+ return true;
+ if ((*V1)[i1].first < (*V2)[i2].first)
+ ++i1;
+ else
+ ++i2;
+ }
+ return false;
+}
+
+/// SimplifyEqualityComparisonWithOnlyPredecessor - If TI is known to be a
+/// terminator instruction and its block is known to only have a single
+/// predecessor block, check to see if that predecessor is also a value
+/// comparison with the same value, and if that comparison determines the
+/// outcome of this comparison. If so, simplify TI. This does a very limited
+/// form of jump threading.
+bool SimplifyCFGOpt::
+SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
+ BasicBlock *Pred) {
+ Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
+ if (!PredVal) return false; // Not a value comparison in predecessor.
+
+ Value *ThisVal = isValueEqualityComparison(TI);
+ assert(ThisVal && "This isn't a value comparison!!");
+ if (ThisVal != PredVal) return false; // Different predicates.
+
+ // Find out information about when control will move from Pred to TI's block.
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > PredCases;
+ BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(),
+ PredCases);
+ EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
+
+ // Find information about how control leaves this block.
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > ThisCases;
+ BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
+ EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
+
+ // If TI's block is the default block from Pred's comparison, potentially
+ // simplify TI based on this knowledge.
+ if (PredDef == TI->getParent()) {
+ // If we are here, we know that the value is none of those cases listed in
+ // PredCases. If there are any cases in ThisCases that are in PredCases, we
+ // can simplify TI.
+ if (ValuesOverlap(PredCases, ThisCases)) {
+ if (isa<BranchInst>(TI)) {
+ // Okay, one of the successors of this condbr is dead. Convert it to a
+ // uncond br.
+ assert(ThisCases.size() == 1 && "Branch can only have one case!");
+ // Insert the new branch.
+ Instruction *NI = BranchInst::Create(ThisDef, TI);
+ (void) NI;
+
+ // Remove PHI node entries for the dead edge.
+ ThisCases[0].second->removePredecessor(TI->getParent());
+
+ DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
+
+ EraseTerminatorInstAndDCECond(TI);
+ return true;
+
+ } else {
+ SwitchInst *SI = cast<SwitchInst>(TI);
+ // Okay, TI has cases that are statically dead, prune them away.
+ SmallPtrSet<Constant*, 16> DeadCases;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ DeadCases.insert(PredCases[i].first);
+
+ DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI);
+
+ for (unsigned i = SI->getNumCases()-1; i != 0; --i)
+ if (DeadCases.count(SI->getCaseValue(i))) {
+ SI->getSuccessor(i)->removePredecessor(TI->getParent());
+ SI->removeCase(i);
+ }
+
+ DEBUG(dbgs() << "Leaving: " << *TI << "\n");
+ return true;
+ }
+ }
+
+ } else {
+ // Otherwise, TI's block must correspond to some matched value. Find out
+ // which value (or set of values) this is.
+ ConstantInt *TIV = 0;
+ BasicBlock *TIBB = TI->getParent();
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].second == TIBB) {
+ if (TIV == 0)
+ TIV = PredCases[i].first;
+ else
+ return false; // Cannot handle multiple values coming to this block.
+ }
+ assert(TIV && "No edge from pred to succ?");
+
+ // Okay, we found the one constant that our value can be if we get into TI's
+ // BB. Find out which successor will unconditionally be branched to.
+ BasicBlock *TheRealDest = 0;
+ for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
+ if (ThisCases[i].first == TIV) {
+ TheRealDest = ThisCases[i].second;
+ break;
+ }
+
+ // If not handled by any explicit cases, it is handled by the default case.
+ if (TheRealDest == 0) TheRealDest = ThisDef;
+
+ // Remove PHI node entries for dead edges.
+ BasicBlock *CheckEdge = TheRealDest;
+ for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI)
+ if (*SI != CheckEdge)
+ (*SI)->removePredecessor(TIBB);
+ else
+ CheckEdge = 0;
+
+ // Insert the new branch.
+ Instruction *NI = BranchInst::Create(TheRealDest, TI);
+ (void) NI;
+
+ DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
+
+ EraseTerminatorInstAndDCECond(TI);
+ return true;
+ }
+ return false;
+}
+
+namespace {
+ /// ConstantIntOrdering - This class implements a stable ordering of constant
+ /// integers that does not depend on their address. This is important for
+ /// applications that sort ConstantInt's to ensure uniqueness.
+ struct ConstantIntOrdering {
+ bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
+ return LHS->getValue().ult(RHS->getValue());
+ }
+ };
+}
+
+/// FoldValueComparisonIntoPredecessors - The specified terminator is a value
+/// equality comparison instruction (either a switch or a branch on "X == c").
+/// See if any of the predecessors of the terminator block are value comparisons
+/// on the same value. If so, and if safe to do so, fold them together.
+bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI) {
+ BasicBlock *BB = TI->getParent();
+ Value *CV = isValueEqualityComparison(TI); // CondVal
+ assert(CV && "Not a comparison?");
+ bool Changed = false;
+
+ SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
+ while (!Preds.empty()) {
+ BasicBlock *Pred = Preds.pop_back_val();
+
+ // See if the predecessor is a comparison with the same value.
+ TerminatorInst *PTI = Pred->getTerminator();
+ Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
+
+ if (PCV == CV && SafeToMergeTerminators(TI, PTI)) {
+ // Figure out which 'cases' to copy from SI to PSI.
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > BBCases;
+ BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
+
+ std::vector<std::pair<ConstantInt*, BasicBlock*> > PredCases;
+ BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
+
+ // Based on whether the default edge from PTI goes to BB or not, fill in
+ // PredCases and PredDefault with the new switch cases we would like to
+ // build.
+ SmallVector<BasicBlock*, 8> NewSuccessors;
+
+ if (PredDefault == BB) {
+ // If this is the default destination from PTI, only the edges in TI
+ // that don't occur in PTI, or that branch to BB will be activated.
+ std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].second != BB)
+ PTIHandled.insert(PredCases[i].first);
+ else {
+ // The default destination is BB, we don't need explicit targets.
+ std::swap(PredCases[i], PredCases.back());
+ PredCases.pop_back();
+ --i; --e;
+ }
+
+ // Reconstruct the new switch statement we will be building.
+ if (PredDefault != BBDefault) {
+ PredDefault->removePredecessor(Pred);
+ PredDefault = BBDefault;
+ NewSuccessors.push_back(BBDefault);
+ }
+ for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+ if (!PTIHandled.count(BBCases[i].first) &&
+ BBCases[i].second != BBDefault) {
+ PredCases.push_back(BBCases[i]);
+ NewSuccessors.push_back(BBCases[i].second);
+ }
+
+ } else {
+ // If this is not the default destination from PSI, only the edges
+ // in SI that occur in PSI with a destination of BB will be
+ // activated.
+ std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].second == BB) {
+ PTIHandled.insert(PredCases[i].first);
+ std::swap(PredCases[i], PredCases.back());
+ PredCases.pop_back();
+ --i; --e;
+ }
+
+ // Okay, now we know which constants were sent to BB from the
+ // predecessor. Figure out where they will all go now.
+ for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+ if (PTIHandled.count(BBCases[i].first)) {
+ // If this is one we are capable of getting...
+ PredCases.push_back(BBCases[i]);
+ NewSuccessors.push_back(BBCases[i].second);
+ PTIHandled.erase(BBCases[i].first);// This constant is taken care of
+ }
+
+ // If there are any constants vectored to BB that TI doesn't handle,
+ // they must go to the default destination of TI.
+ for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I =
+ PTIHandled.begin(),
+ E = PTIHandled.end(); I != E; ++I) {
+ PredCases.push_back(std::make_pair(*I, BBDefault));
+ NewSuccessors.push_back(BBDefault);
+ }
+ }
+
+ // Okay, at this point, we know which new successor Pred will get. Make
+ // sure we update the number of entries in the PHI nodes for these
+ // successors.
+ for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i)
+ AddPredecessorToBlock(NewSuccessors[i], Pred, BB);
+
+ // Convert pointer to int before we switch.
+ if (CV->getType()->isPointerTy()) {
+ assert(TD && "Cannot switch on pointer without TargetData");
+ CV = new PtrToIntInst(CV, TD->getIntPtrType(CV->getContext()),
+ "magicptr", PTI);
+ }
+
+ // Now that the successors are updated, create the new Switch instruction.
+ SwitchInst *NewSI = SwitchInst::Create(CV, PredDefault,
+ PredCases.size(), PTI);
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ NewSI->addCase(PredCases[i].first, PredCases[i].second);
+
+ EraseTerminatorInstAndDCECond(PTI);
+
+ // Okay, last check. If BB is still a successor of PSI, then we must
+ // have an infinite loop case. If so, add an infinitely looping block
+ // to handle the case to preserve the behavior of the code.
+ BasicBlock *InfLoopBlock = 0;
+ for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
+ if (NewSI->getSuccessor(i) == BB) {
+ if (InfLoopBlock == 0) {
+ // Insert it at the end of the function, because it's either code,
+ // or it won't matter if it's hot. :)
+ InfLoopBlock = BasicBlock::Create(BB->getContext(),
+ "infloop", BB->getParent());
+ BranchInst::Create(InfLoopBlock, InfLoopBlock);
+ }
+ NewSI->setSuccessor(i, InfLoopBlock);
+ }
+
+ Changed = true;
+ }
+ }
+ return Changed;
+}
+
+// isSafeToHoistInvoke - If we would need to insert a select that uses the
+// value of this invoke (comments in HoistThenElseCodeToIf explain why we
+// would need to do this), we can't hoist the invoke, as there is nowhere
+// to put the select in this case.
+static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
+ Instruction *I1, Instruction *I2) {
+ for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = SI->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ if (BB1V != BB2V && (BB1V==I1 || BB2V==I2)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+/// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and
+/// BB2, hoist any common code in the two blocks up into the branch block. The
+/// caller of this function guarantees that BI's block dominates BB1 and BB2.
+static bool HoistThenElseCodeToIf(BranchInst *BI) {
+ // This does very trivial matching, with limited scanning, to find identical
+ // instructions in the two blocks. In particular, we don't want to get into
+ // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As
+ // such, we currently just scan for obviously identical instructions in an
+ // identical order.
+ BasicBlock *BB1 = BI->getSuccessor(0); // The true destination.
+ BasicBlock *BB2 = BI->getSuccessor(1); // The false destination
+
+ BasicBlock::iterator BB1_Itr = BB1->begin();
+ BasicBlock::iterator BB2_Itr = BB2->begin();
+
+ Instruction *I1 = BB1_Itr++, *I2 = BB2_Itr++;
+ while (isa<DbgInfoIntrinsic>(I1))
+ I1 = BB1_Itr++;
+ while (isa<DbgInfoIntrinsic>(I2))
+ I2 = BB2_Itr++;
+ if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) ||
+ !I1->isIdenticalToWhenDefined(I2) ||
+ (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
+ return false;
+
+ // If we get here, we can hoist at least one instruction.
+ BasicBlock *BIParent = BI->getParent();
+
+ do {
+ // If we are hoisting the terminator instruction, don't move one (making a
+ // broken BB), instead clone it, and remove BI.
+ if (isa<TerminatorInst>(I1))
+ goto HoistTerminator;
+
+ // For a normal instruction, we just move one to right before the branch,
+ // then replace all uses of the other with the first. Finally, we remove
+ // the now redundant second instruction.
+ BIParent->getInstList().splice(BI, BB1->getInstList(), I1);
+ if (!I2->use_empty())
+ I2->replaceAllUsesWith(I1);
+ I1->intersectOptionalDataWith(I2);
+ BB2->getInstList().erase(I2);
+
+ I1 = BB1_Itr++;
+ while (isa<DbgInfoIntrinsic>(I1))
+ I1 = BB1_Itr++;
+ I2 = BB2_Itr++;
+ while (isa<DbgInfoIntrinsic>(I2))
+ I2 = BB2_Itr++;
+ } while (I1->getOpcode() == I2->getOpcode() &&
+ I1->isIdenticalToWhenDefined(I2));
+
+ return true;
+
+HoistTerminator:
+ // It may not be possible to hoist an invoke.
+ if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
+ return true;
+
+ // Okay, it is safe to hoist the terminator.
+ Instruction *NT = I1->clone();
+ BIParent->getInstList().insert(BI, NT);
+ if (!NT->getType()->isVoidTy()) {
+ I1->replaceAllUsesWith(NT);
+ I2->replaceAllUsesWith(NT);
+ NT->takeName(I1);
+ }
+
+ // Hoisting one of the terminators from our successor is a great thing.
+ // Unfortunately, the successors of the if/else blocks may have PHI nodes in
+ // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI
+ // nodes, so we insert select instruction to compute the final result.
+ std::map<std::pair<Value*,Value*>, SelectInst*> InsertedSelects;
+ for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = SI->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ if (BB1V != BB2V) {
+ // These values do not agree. Insert a select instruction before NT
+ // that determines the right value.
+ SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
+ if (SI == 0)
+ SI = SelectInst::Create(BI->getCondition(), BB1V, BB2V,
+ BB1V->getName()+"."+BB2V->getName(), NT);
+ // Make the PHI node use the select for all incoming values for BB1/BB2
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2)
+ PN->setIncomingValue(i, SI);
+ }
+ }
+ }
+
+ // Update any PHI nodes in our new successors.
+ for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI)
+ AddPredecessorToBlock(*SI, BIParent, BB1);
+
+ EraseTerminatorInstAndDCECond(BI);
+ return true;
+}
+
+/// SpeculativelyExecuteBB - Given a conditional branch that goes to BB1
+/// and an BB2 and the only successor of BB1 is BB2, hoist simple code
+/// (for now, restricted to a single instruction that's side effect free) from
+/// the BB1 into the branch block to speculatively execute it.
+static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
+ // Only speculatively execution a single instruction (not counting the
+ // terminator) for now.
+ Instruction *HInst = NULL;
+ Instruction *Term = BB1->getTerminator();
+ for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end();
+ BBI != BBE; ++BBI) {
+ Instruction *I = BBI;
+ // Skip debug info.
+ if (isa<DbgInfoIntrinsic>(I)) continue;
+ if (I == Term) break;
+
+ if (!HInst)
+ HInst = I;
+ else
+ return false;
+ }
+ if (!HInst)
+ return false;
+
+ // Be conservative for now. FP select instruction can often be expensive.
+ Value *BrCond = BI->getCondition();
+ if (isa<Instruction>(BrCond) &&
+ cast<Instruction>(BrCond)->getOpcode() == Instruction::FCmp)
+ return false;
+
+ // If BB1 is actually on the false edge of the conditional branch, remember
+ // to swap the select operands later.
+ bool Invert = false;
+ if (BB1 != BI->getSuccessor(0)) {
+ assert(BB1 == BI->getSuccessor(1) && "No edge from 'if' block?");
+ Invert = true;
+ }
+
+ // Turn
+ // BB:
+ // %t1 = icmp
+ // br i1 %t1, label %BB1, label %BB2
+ // BB1:
+ // %t3 = add %t2, c
+ // br label BB2
+ // BB2:
+ // =>
+ // BB:
+ // %t1 = icmp
+ // %t4 = add %t2, c
+ // %t3 = select i1 %t1, %t2, %t3
+ switch (HInst->getOpcode()) {
+ default: return false; // Not safe / profitable to hoist.
+ case Instruction::Add:
+ case Instruction::Sub:
+ // Not worth doing for vector ops.
+ if (HInst->getType()->isVectorTy())
+ return false;
+ break;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ // Don't mess with vector operations.
+ if (HInst->getType()->isVectorTy())
+ return false;
+ break; // These are all cheap and non-trapping instructions.
+ }
+
+ // If the instruction is obviously dead, don't try to predicate it.
+ if (HInst->use_empty()) {
+ HInst->eraseFromParent();
+ return true;
+ }
+
+ // Can we speculatively execute the instruction? And what is the value
+ // if the condition is false? Consider the phi uses, if the incoming value
+ // from the "if" block are all the same V, then V is the value of the
+ // select if the condition is false.
+ BasicBlock *BIParent = BI->getParent();
+ SmallVector<PHINode*, 4> PHIUses;
+ Value *FalseV = NULL;
+
+ BasicBlock *BB2 = BB1->getTerminator()->getSuccessor(0);
+ for (Value::use_iterator UI = HInst->use_begin(), E = HInst->use_end();
+ UI != E; ++UI) {
+ // Ignore any user that is not a PHI node in BB2. These can only occur in
+ // unreachable blocks, because they would not be dominated by the instr.
+ PHINode *PN = dyn_cast<PHINode>(*UI);
+ if (!PN || PN->getParent() != BB2)
+ return false;
+ PHIUses.push_back(PN);
+
+ Value *PHIV = PN->getIncomingValueForBlock(BIParent);
+ if (!FalseV)
+ FalseV = PHIV;
+ else if (FalseV != PHIV)
+ return false; // Inconsistent value when condition is false.
+ }
+
+ assert(FalseV && "Must have at least one user, and it must be a PHI");
+
+ // Do not hoist the instruction if any of its operands are defined but not
+ // used in this BB. The transformation will prevent the operand from
+ // being sunk into the use block.
+ for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end();
+ i != e; ++i) {
+ Instruction *OpI = dyn_cast<Instruction>(*i);
+ if (OpI && OpI->getParent() == BIParent &&
+ !OpI->isUsedInBasicBlock(BIParent))
+ return false;
+ }
+
+ // If we get here, we can hoist the instruction. Try to place it
+ // before the icmp instruction preceding the conditional branch.
+ BasicBlock::iterator InsertPos = BI;
+ if (InsertPos != BIParent->begin())
+ --InsertPos;
+ // Skip debug info between condition and branch.
+ while (InsertPos != BIParent->begin() && isa<DbgInfoIntrinsic>(InsertPos))
+ --InsertPos;
+ if (InsertPos == BrCond && !isa<PHINode>(BrCond)) {
+ SmallPtrSet<Instruction *, 4> BB1Insns;
+ for(BasicBlock::iterator BB1I = BB1->begin(), BB1E = BB1->end();
+ BB1I != BB1E; ++BB1I)
+ BB1Insns.insert(BB1I);
+ for(Value::use_iterator UI = BrCond->use_begin(), UE = BrCond->use_end();
+ UI != UE; ++UI) {
+ Instruction *Use = cast<Instruction>(*UI);
+ if (BB1Insns.count(Use)) {
+ // If BrCond uses the instruction that place it just before
+ // branch instruction.
+ InsertPos = BI;
+ break;
+ }
+ }
+ } else
+ InsertPos = BI;
+ BIParent->getInstList().splice(InsertPos, BB1->getInstList(), HInst);
+
+ // Create a select whose true value is the speculatively executed value and
+ // false value is the previously determined FalseV.
+ SelectInst *SI;
+ if (Invert)
+ SI = SelectInst::Create(BrCond, FalseV, HInst,
+ FalseV->getName() + "." + HInst->getName(), BI);
+ else
+ SI = SelectInst::Create(BrCond, HInst, FalseV,
+ HInst->getName() + "." + FalseV->getName(), BI);
+
+ // Make the PHI node use the select for all incoming values for "then" and
+ // "if" blocks.
+ for (unsigned i = 0, e = PHIUses.size(); i != e; ++i) {
+ PHINode *PN = PHIUses[i];
+ for (unsigned j = 0, ee = PN->getNumIncomingValues(); j != ee; ++j)
+ if (PN->getIncomingBlock(j) == BB1 ||
+ PN->getIncomingBlock(j) == BIParent)
+ PN->setIncomingValue(j, SI);
+ }
+
+ ++NumSpeculations;
+ return true;
+}
+
+/// BlockIsSimpleEnoughToThreadThrough - Return true if we can thread a branch
+/// across this block.
+static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
+ BranchInst *BI = cast<BranchInst>(BB->getTerminator());
+ unsigned Size = 0;
+
+ for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+ if (isa<DbgInfoIntrinsic>(BBI))
+ continue;
+ if (Size > 10) return false; // Don't clone large BB's.
+ ++Size;
+
+ // We can only support instructions that do not define values that are
+ // live outside of the current basic block.
+ for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
+ UI != E; ++UI) {
+ Instruction *U = cast<Instruction>(*UI);
+ if (U->getParent() != BB || isa<PHINode>(U)) return false;
+ }
+
+ // Looks ok, continue checking.
+ }
+
+ return true;
+}
+
+/// FoldCondBranchOnPHI - If we have a conditional branch on a PHI node value
+/// that is defined in the same block as the branch and if any PHI entries are
+/// constants, thread edges corresponding to that entry to be branches to their
+/// ultimate destination.
+static bool FoldCondBranchOnPHI(BranchInst *BI) {
+ BasicBlock *BB = BI->getParent();
+ PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
+ // NOTE: we currently cannot transform this case if the PHI node is used
+ // outside of the block.
+ if (!PN || PN->getParent() != BB || !PN->hasOneUse())
+ return false;
+
+ // Degenerate case of a single entry PHI.
+ if (PN->getNumIncomingValues() == 1) {
+ FoldSingleEntryPHINodes(PN->getParent());
+ return true;
+ }
+
+ // Now we know that this block has multiple preds and two succs.
+ if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false;
+
+ // Okay, this is a simple enough basic block. See if any phi values are
+ // constants.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ ConstantInt *CB;
+ if ((CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i))) &&
+ CB->getType()->isIntegerTy(1)) {
+ // Okay, we now know that all edges from PredBB should be revectored to
+ // branch to RealDest.
+ BasicBlock *PredBB = PN->getIncomingBlock(i);
+ BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
+
+ if (RealDest == BB) continue; // Skip self loops.
+
+ // The dest block might have PHI nodes, other predecessors and other
+ // difficult cases. Instead of being smart about this, just insert a new
+ // block that jumps to the destination block, effectively splitting
+ // the edge we are about to create.
+ BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(),
+ RealDest->getName()+".critedge",
+ RealDest->getParent(), RealDest);
+ BranchInst::Create(RealDest, EdgeBB);
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = RealDest->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ Value *V = PN->getIncomingValueForBlock(BB);
+ PN->addIncoming(V, EdgeBB);
+ }
+
+ // BB may have instructions that are being threaded over. Clone these
+ // instructions into EdgeBB. We know that there will be no uses of the
+ // cloned instructions outside of EdgeBB.
+ BasicBlock::iterator InsertPt = EdgeBB->begin();
+ std::map<Value*, Value*> TranslateMap; // Track translated values.
+ for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
+ TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
+ } else {
+ // Clone the instruction.
+ Instruction *N = BBI->clone();
+ if (BBI->hasName()) N->setName(BBI->getName()+".c");
+
+ // Update operands due to translation.
+ for (User::op_iterator i = N->op_begin(), e = N->op_end();
+ i != e; ++i) {
+ std::map<Value*, Value*>::iterator PI =
+ TranslateMap.find(*i);
+ if (PI != TranslateMap.end())
+ *i = PI->second;
+ }
+
+ // Check for trivial simplification.
+ if (Constant *C = ConstantFoldInstruction(N)) {
+ TranslateMap[BBI] = C;
+ delete N; // Constant folded away, don't need actual inst
+ } else {
+ // Insert the new instruction into its new home.
+ EdgeBB->getInstList().insert(InsertPt, N);
+ if (!BBI->use_empty())
+ TranslateMap[BBI] = N;
+ }
+ }
+ }
+
+ // Loop over all of the edges from PredBB to BB, changing them to branch
+ // to EdgeBB instead.
+ TerminatorInst *PredBBTI = PredBB->getTerminator();
+ for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
+ if (PredBBTI->getSuccessor(i) == BB) {
+ BB->removePredecessor(PredBB);
+ PredBBTI->setSuccessor(i, EdgeBB);
+ }
+
+ // Recurse, simplifying any other constants.
+ return FoldCondBranchOnPHI(BI) | true;
+ }
+ }
+
+ return false;
+}
+
+/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
+/// PHI node, see if we can eliminate it.
+static bool FoldTwoEntryPHINode(PHINode *PN) {
+ // Ok, this is a two entry PHI node. Check to see if this is a simple "if
+ // statement", which has a very simple dominance structure. Basically, we
+ // are trying to find the condition that is being branched on, which
+ // subsequently causes this merge to happen. We really want control
+ // dependence information for this check, but simplifycfg can't keep it up
+ // to date, and this catches most of the cases we care about anyway.
+ //
+ BasicBlock *BB = PN->getParent();
+ BasicBlock *IfTrue, *IfFalse;
+ Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse);
+ if (!IfCond) return false;
+
+ // Okay, we found that we can merge this two-entry phi node into a select.
+ // Doing so would require us to fold *all* two entry phi nodes in this block.
+ // At some point this becomes non-profitable (particularly if the target
+ // doesn't support cmov's). Only do this transformation if there are two or
+ // fewer PHI nodes in this block.
+ unsigned NumPhis = 0;
+ for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
+ if (NumPhis > 2)
+ return false;
+
+ DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: "
+ << IfTrue->getName() << " F: " << IfFalse->getName() << "\n");
+
+ // Loop over the PHI's seeing if we can promote them all to select
+ // instructions. While we are at it, keep track of the instructions
+ // that need to be moved to the dominating block.
+ std::set<Instruction*> AggressiveInsts;
+
+ BasicBlock::iterator AfterPHIIt = BB->begin();
+ while (isa<PHINode>(AfterPHIIt)) {
+ PHINode *PN = cast<PHINode>(AfterPHIIt++);
+ if (PN->getIncomingValue(0) == PN->getIncomingValue(1)) {
+ if (PN->getIncomingValue(0) != PN)
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ else
+ PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+ } else if (!DominatesMergePoint(PN->getIncomingValue(0), BB,
+ &AggressiveInsts) ||
+ !DominatesMergePoint(PN->getIncomingValue(1), BB,
+ &AggressiveInsts)) {
+ return false;
+ }
+ }
+
+ // If we all PHI nodes are promotable, check to make sure that all
+ // instructions in the predecessor blocks can be promoted as well. If
+ // not, we won't be able to get rid of the control flow, so it's not
+ // worth promoting to select instructions.
+ BasicBlock *DomBlock = 0, *IfBlock1 = 0, *IfBlock2 = 0;
+ PN = cast<PHINode>(BB->begin());
+ BasicBlock *Pred = PN->getIncomingBlock(0);
+ if (cast<BranchInst>(Pred->getTerminator())->isUnconditional()) {
+ IfBlock1 = Pred;
+ DomBlock = *pred_begin(Pred);
+ for (BasicBlock::iterator I = Pred->begin();
+ !isa<TerminatorInst>(I); ++I)
+ if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
+ // This is not an aggressive instruction that we can promote.
+ // Because of this, we won't be able to get rid of the control
+ // flow, so the xform is not worth it.
+ return false;
+ }
+ }
+
+ Pred = PN->getIncomingBlock(1);
+ if (cast<BranchInst>(Pred->getTerminator())->isUnconditional()) {
+ IfBlock2 = Pred;
+ DomBlock = *pred_begin(Pred);
+ for (BasicBlock::iterator I = Pred->begin();
+ !isa<TerminatorInst>(I); ++I)
+ if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
+ // This is not an aggressive instruction that we can promote.
+ // Because of this, we won't be able to get rid of the control
+ // flow, so the xform is not worth it.
+ return false;
+ }
+ }
+
+ // If we can still promote the PHI nodes after this gauntlet of tests,
+ // do all of the PHI's now.
+
+ // Move all 'aggressive' instructions, which are defined in the
+ // conditional parts of the if's up to the dominating block.
+ if (IfBlock1) {
+ DomBlock->getInstList().splice(DomBlock->getTerminator(),
+ IfBlock1->getInstList(),
+ IfBlock1->begin(),
+ IfBlock1->getTerminator());
+ }
+ if (IfBlock2) {
+ DomBlock->getInstList().splice(DomBlock->getTerminator(),
+ IfBlock2->getInstList(),
+ IfBlock2->begin(),
+ IfBlock2->getTerminator());
+ }
+
+ while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ // Change the PHI node into a select instruction.
+ Value *TrueVal =
+ PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
+ Value *FalseVal =
+ PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
+
+ Value *NV = SelectInst::Create(IfCond, TrueVal, FalseVal, "", AfterPHIIt);
+ PN->replaceAllUsesWith(NV);
+ NV->takeName(PN);
+
+ BB->getInstList().erase(PN);
+ }
+ return true;
+}
+
+/// isTerminatorFirstRelevantInsn - Return true if Term is very first
+/// instruction ignoring Phi nodes and dbg intrinsics.
+static bool isTerminatorFirstRelevantInsn(BasicBlock *BB, Instruction *Term) {
+ BasicBlock::iterator BBI = Term;
+ while (BBI != BB->begin()) {
+ --BBI;
+ if (!isa<DbgInfoIntrinsic>(BBI))
+ break;
+ }
+
+ if (isa<PHINode>(BBI) || &*BBI == Term || isa<DbgInfoIntrinsic>(BBI))
+ return true;
+ return false;
+}
+
+/// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes
+/// to two returning blocks, try to merge them together into one return,
+/// introducing a select if the return values disagree.
+static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {
+ assert(BI->isConditional() && "Must be a conditional branch");
+ BasicBlock *TrueSucc = BI->getSuccessor(0);
+ BasicBlock *FalseSucc = BI->getSuccessor(1);
+ ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator());
+ ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator());
+
+ // Check to ensure both blocks are empty (just a return) or optionally empty
+ // with PHI nodes. If there are other instructions, merging would cause extra
+ // computation on one path or the other.
+ if (!isTerminatorFirstRelevantInsn(TrueSucc, TrueRet))
+ return false;
+ if (!isTerminatorFirstRelevantInsn(FalseSucc, FalseRet))
+ return false;
+
+ // Okay, we found a branch that is going to two return nodes. If
+ // there is no return value for this function, just change the
+ // branch into a return.
+ if (FalseRet->getNumOperands() == 0) {
+ TrueSucc->removePredecessor(BI->getParent());
+ FalseSucc->removePredecessor(BI->getParent());
+ ReturnInst::Create(BI->getContext(), 0, BI);
+ EraseTerminatorInstAndDCECond(BI);
+ return true;
+ }
+
+ // Otherwise, figure out what the true and false return values are
+ // so we can insert a new select instruction.
+ Value *TrueValue = TrueRet->getReturnValue();
+ Value *FalseValue = FalseRet->getReturnValue();
+
+ // Unwrap any PHI nodes in the return blocks.
+ if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue))
+ if (TVPN->getParent() == TrueSucc)
+ TrueValue = TVPN->getIncomingValueForBlock(BI->getParent());
+ if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue))
+ if (FVPN->getParent() == FalseSucc)
+ FalseValue = FVPN->getIncomingValueForBlock(BI->getParent());
+
+ // In order for this transformation to be safe, we must be able to
+ // unconditionally execute both operands to the return. This is
+ // normally the case, but we could have a potentially-trapping
+ // constant expression that prevents this transformation from being
+ // safe.
+ if (ConstantExpr *TCV = dyn_cast_or_null<ConstantExpr>(TrueValue))
+ if (TCV->canTrap())
+ return false;
+ if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue))
+ if (FCV->canTrap())
+ return false;
+
+ // Okay, we collected all the mapped values and checked them for sanity, and
+ // defined to really do this transformation. First, update the CFG.
+ TrueSucc->removePredecessor(BI->getParent());
+ FalseSucc->removePredecessor(BI->getParent());
+
+ // Insert select instructions where needed.
+ Value *BrCond = BI->getCondition();
+ if (TrueValue) {
+ // Insert a select if the results differ.
+ if (TrueValue == FalseValue || isa<UndefValue>(FalseValue)) {
+ } else if (isa<UndefValue>(TrueValue)) {
+ TrueValue = FalseValue;
+ } else {
+ TrueValue = SelectInst::Create(BrCond, TrueValue,
+ FalseValue, "retval", BI);
+ }
+ }
+
+ Value *RI = !TrueValue ?
+ ReturnInst::Create(BI->getContext(), BI) :
+ ReturnInst::Create(BI->getContext(), TrueValue, BI);
+ (void) RI;
+
+ DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
+ << "\n " << *BI << "NewRet = " << *RI
+ << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc);
+
+ EraseTerminatorInstAndDCECond(BI);
+
+ return true;
+}
+
+/// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch,
+/// and if a predecessor branches to us and one of our successors, fold the
+/// setcc into the predecessor and use logical operations to pick the right
+/// destination.
+bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
+ BasicBlock *BB = BI->getParent();
+ Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
+ if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
+ Cond->getParent() != BB || !Cond->hasOneUse())
+ return false;
+
+ // Only allow this if the condition is a simple instruction that can be
+ // executed unconditionally. It must be in the same block as the branch, and
+ // must be at the front of the block.
+ BasicBlock::iterator FrontIt = BB->front();
+ // Ignore dbg intrinsics.
+ while(isa<DbgInfoIntrinsic>(FrontIt))
+ ++FrontIt;
+
+ // Allow a single instruction to be hoisted in addition to the compare
+ // that feeds the branch. We later ensure that any values that _it_ uses
+ // were also live in the predecessor, so that we don't unnecessarily create
+ // register pressure or inhibit out-of-order execution.
+ Instruction *BonusInst = 0;
+ if (&*FrontIt != Cond &&
+ FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond &&
+ FrontIt->isSafeToSpeculativelyExecute()) {
+ BonusInst = &*FrontIt;
+ ++FrontIt;
+ }
+
+ // Only a single bonus inst is allowed.
+ if (&*FrontIt != Cond)
+ return false;
+
+ // Make sure the instruction after the condition is the cond branch.
+ BasicBlock::iterator CondIt = Cond; ++CondIt;
+ // Ingore dbg intrinsics.
+ while(isa<DbgInfoIntrinsic>(CondIt))
+ ++CondIt;
+ if (&*CondIt != BI) {
+ assert (!isa<DbgInfoIntrinsic>(CondIt) && "Hey do not forget debug info!");
+ return false;
+ }
+
+ // Cond is known to be a compare or binary operator. Check to make sure that
+ // neither operand is a potentially-trapping constant expression.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0)))
+ if (CE->canTrap())
+ return false;
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
+ if (CE->canTrap())
+ return false;
+
+
+ // Finally, don't infinitely unroll conditional loops.
+ BasicBlock *TrueDest = BI->getSuccessor(0);
+ BasicBlock *FalseDest = BI->getSuccessor(1);
+ if (TrueDest == BB || FalseDest == BB)
+ return false;
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *PredBlock = *PI;
+ BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
+
+ // Check that we have two conditional branches. If there is a PHI node in
+ // the common successor, verify that the same value flows in from both
+ // blocks.
+ if (PBI == 0 || PBI->isUnconditional() ||
+ !SafeToMergeTerminators(BI, PBI))
+ continue;
+
+ // Ensure that any values used in the bonus instruction are also used
+ // by the terminator of the predecessor. This means that those values
+ // must already have been resolved, so we won't be inhibiting the
+ // out-of-order core by speculating them earlier.
+ if (BonusInst) {
+ // Collect the values used by the bonus inst
+ SmallPtrSet<Value*, 4> UsedValues;
+ for (Instruction::op_iterator OI = BonusInst->op_begin(),
+ OE = BonusInst->op_end(); OI != OE; ++OI) {
+ Value* V = *OI;
+ if (!isa<Constant>(V))
+ UsedValues.insert(V);
+ }
+
+ SmallVector<std::pair<Value*, unsigned>, 4> Worklist;
+ Worklist.push_back(std::make_pair(PBI->getOperand(0), 0));
+
+ // Walk up to four levels back up the use-def chain of the predecessor's
+ // terminator to see if all those values were used. The choice of four
+ // levels is arbitrary, to provide a compile-time-cost bound.
+ while (!Worklist.empty()) {
+ std::pair<Value*, unsigned> Pair = Worklist.back();
+ Worklist.pop_back();
+
+ if (Pair.second >= 4) continue;
+ UsedValues.erase(Pair.first);
+ if (UsedValues.empty()) break;
+
+ if (Instruction* I = dyn_cast<Instruction>(Pair.first)) {
+ for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
+ OI != OE; ++OI)
+ Worklist.push_back(std::make_pair(OI->get(), Pair.second+1));
+ }
+ }
+
+ if (!UsedValues.empty()) return false;
+ }
+
+ Instruction::BinaryOps Opc;
+ bool InvertPredCond = false;
+
+ if (PBI->getSuccessor(0) == TrueDest)
+ Opc = Instruction::Or;
+ else if (PBI->getSuccessor(1) == FalseDest)
+ Opc = Instruction::And;
+ else if (PBI->getSuccessor(0) == FalseDest)
+ Opc = Instruction::And, InvertPredCond = true;
+ else if (PBI->getSuccessor(1) == TrueDest)
+ Opc = Instruction::Or, InvertPredCond = true;
+ else
+ continue;
+
+ DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
+
+ // If we need to invert the condition in the pred block to match, do so now.
+ if (InvertPredCond) {
+ Value *NewCond =
+ BinaryOperator::CreateNot(PBI->getCondition(),
+ PBI->getCondition()->getName()+".not", PBI);
+ PBI->setCondition(NewCond);
+ BasicBlock *OldTrue = PBI->getSuccessor(0);
+ BasicBlock *OldFalse = PBI->getSuccessor(1);
+ PBI->setSuccessor(0, OldFalse);
+ PBI->setSuccessor(1, OldTrue);
+ }
+
+ // If we have a bonus inst, clone it into the predecessor block.
+ Instruction *NewBonus = 0;
+ if (BonusInst) {
+ NewBonus = BonusInst->clone();
+ PredBlock->getInstList().insert(PBI, NewBonus);
+ NewBonus->takeName(BonusInst);
+ BonusInst->setName(BonusInst->getName()+".old");
+ }
+
+ // Clone Cond into the predecessor basic block, and or/and the
+ // two conditions together.
+ Instruction *New = Cond->clone();
+ if (BonusInst) New->replaceUsesOfWith(BonusInst, NewBonus);
+ PredBlock->getInstList().insert(PBI, New);
+ New->takeName(Cond);
+ Cond->setName(New->getName()+".old");
+
+ Value *NewCond = BinaryOperator::Create(Opc, PBI->getCondition(),
+ New, "or.cond", PBI);
+ PBI->setCondition(NewCond);
+ if (PBI->getSuccessor(0) == BB) {
+ AddPredecessorToBlock(TrueDest, PredBlock, BB);
+ PBI->setSuccessor(0, TrueDest);
+ }
+ if (PBI->getSuccessor(1) == BB) {
+ AddPredecessorToBlock(FalseDest, PredBlock, BB);
+ PBI->setSuccessor(1, FalseDest);
+ }
+ return true;
+ }
+ return false;
+}
+
+/// SimplifyCondBranchToCondBranch - If we have a conditional branch as a
+/// predecessor of another block, this function tries to simplify it. We know
+/// that PBI and BI are both conditional branches, and BI is in one of the
+/// successor blocks of PBI - PBI branches to BI.
+static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
+ assert(PBI->isConditional() && BI->isConditional());
+ BasicBlock *BB = BI->getParent();
+
+ // If this block ends with a branch instruction, and if there is a
+ // predecessor that ends on a branch of the same condition, make
+ // this conditional branch redundant.
+ if (PBI->getCondition() == BI->getCondition() &&
+ PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+ // Okay, the outcome of this conditional branch is statically
+ // knowable. If this block had a single pred, handle specially.
+ if (BB->getSinglePredecessor()) {
+ // Turn this into a branch on constant.
+ bool CondIsTrue = PBI->getSuccessor(0) == BB;
+ BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
+ CondIsTrue));
+ return true; // Nuke the branch on constant.
+ }
+
+ // Otherwise, if there are multiple predecessors, insert a PHI that merges
+ // in the constant and simplify the block result. Subsequent passes of
+ // simplifycfg will thread the block.
+ if (BlockIsSimpleEnoughToThreadThrough(BB)) {
+ PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()),
+ BI->getCondition()->getName() + ".pr",
+ BB->begin());
+ // Okay, we're going to insert the PHI node. Since PBI is not the only
+ // predecessor, compute the PHI'd conditional value for all of the preds.
+ // Any predecessor where the condition is not computable we keep symbolic.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) &&
+ PBI != BI && PBI->isConditional() &&
+ PBI->getCondition() == BI->getCondition() &&
+ PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+ bool CondIsTrue = PBI->getSuccessor(0) == BB;
+ NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
+ CondIsTrue), P);
+ } else {
+ NewPN->addIncoming(BI->getCondition(), P);
+ }
+ }
+
+ BI->setCondition(NewPN);
+ return true;
+ }
+ }
+
+ // If this is a conditional branch in an empty block, and if any
+ // predecessors is a conditional branch to one of our destinations,
+ // fold the conditions into logical ops and one cond br.
+ BasicBlock::iterator BBI = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ if (&*BBI != BI)
+ return false;
+
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
+ if (CE->canTrap())
+ return false;
+
+ int PBIOp, BIOp;
+ if (PBI->getSuccessor(0) == BI->getSuccessor(0))
+ PBIOp = BIOp = 0;
+ else if (PBI->getSuccessor(0) == BI->getSuccessor(1))
+ PBIOp = 0, BIOp = 1;
+ else if (PBI->getSuccessor(1) == BI->getSuccessor(0))
+ PBIOp = 1, BIOp = 0;
+ else if (PBI->getSuccessor(1) == BI->getSuccessor(1))
+ PBIOp = BIOp = 1;
+ else
+ return false;
+
+ // Check to make sure that the other destination of this branch
+ // isn't BB itself. If so, this is an infinite loop that will
+ // keep getting unwound.
+ if (PBI->getSuccessor(PBIOp) == BB)
+ return false;
+
+ // Do not perform this transformation if it would require
+ // insertion of a large number of select instructions. For targets
+ // without predication/cmovs, this is a big pessimization.
+ BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
+
+ unsigned NumPhis = 0;
+ for (BasicBlock::iterator II = CommonDest->begin();
+ isa<PHINode>(II); ++II, ++NumPhis)
+ if (NumPhis > 2) // Disable this xform.
+ return false;
+
+ // Finally, if everything is ok, fold the branches to logical ops.
+ BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
+
+ DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
+ << "AND: " << *BI->getParent());
+
+
+ // If OtherDest *is* BB, then BB is a basic block with a single conditional
+ // branch in it, where one edge (OtherDest) goes back to itself but the other
+ // exits. We don't *know* that the program avoids the infinite loop
+ // (even though that seems likely). If we do this xform naively, we'll end up
+ // recursively unpeeling the loop. Since we know that (after the xform is
+ // done) that the block *is* infinite if reached, we just make it an obviously
+ // infinite loop with no cond branch.
+ if (OtherDest == BB) {
+ // Insert it at the end of the function, because it's either code,
+ // or it won't matter if it's hot. :)
+ BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(),
+ "infloop", BB->getParent());
+ BranchInst::Create(InfLoopBlock, InfLoopBlock);
+ OtherDest = InfLoopBlock;
+ }
+
+ DEBUG(dbgs() << *PBI->getParent()->getParent());
+
+ // BI may have other predecessors. Because of this, we leave
+ // it alone, but modify PBI.
+
+ // Make sure we get to CommonDest on True&True directions.
+ Value *PBICond = PBI->getCondition();
+ if (PBIOp)
+ PBICond = BinaryOperator::CreateNot(PBICond,
+ PBICond->getName()+".not",
+ PBI);
+ Value *BICond = BI->getCondition();
+ if (BIOp)
+ BICond = BinaryOperator::CreateNot(BICond,
+ BICond->getName()+".not",
+ PBI);
+ // Merge the conditions.
+ Value *Cond = BinaryOperator::CreateOr(PBICond, BICond, "brmerge", PBI);
+
+ // Modify PBI to branch on the new condition to the new dests.
+ PBI->setCondition(Cond);
+ PBI->setSuccessor(0, CommonDest);
+ PBI->setSuccessor(1, OtherDest);
+
+ // OtherDest may have phi nodes. If so, add an entry from PBI's
+ // block that are identical to the entries for BI's block.
+ PHINode *PN;
+ for (BasicBlock::iterator II = OtherDest->begin();
+ (PN = dyn_cast<PHINode>(II)); ++II) {
+ Value *V = PN->getIncomingValueForBlock(BB);
+ PN->addIncoming(V, PBI->getParent());
+ }
+
+ // We know that the CommonDest already had an edge from PBI to
+ // it. If it has PHIs though, the PHIs may have different
+ // entries for BB and PBI's BB. If so, insert a select to make
+ // them agree.
+ for (BasicBlock::iterator II = CommonDest->begin();
+ (PN = dyn_cast<PHINode>(II)); ++II) {
+ Value *BIV = PN->getIncomingValueForBlock(BB);
+ unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
+ Value *PBIV = PN->getIncomingValue(PBBIdx);
+ if (BIV != PBIV) {
+ // Insert a select in PBI to pick the right value.
+ Value *NV = SelectInst::Create(PBICond, PBIV, BIV,
+ PBIV->getName()+".mux", PBI);
+ PN->setIncomingValue(PBBIdx, NV);
+ }
+ }
+
+ DEBUG(dbgs() << "INTO: " << *PBI->getParent());
+ DEBUG(dbgs() << *PBI->getParent()->getParent());
+
+ // This basic block is probably dead. We know it has at least
+ // one fewer predecessor.
+ return true;
+}
+
+bool SimplifyCFGOpt::run(BasicBlock *BB) {
+ bool Changed = false;
+ Function *M = BB->getParent();
+
+ assert(BB && BB->getParent() && "Block not embedded in function!");
+ assert(BB->getTerminator() && "Degenerate basic block encountered!");
+
+ // Remove basic blocks that have no predecessors (except the entry block)...
+ // or that just have themself as a predecessor. These are unreachable.
+ if ((pred_begin(BB) == pred_end(BB) &&
+ &BB->getParent()->getEntryBlock() != BB) ||
+ BB->getSinglePredecessor() == BB) {
+ DEBUG(dbgs() << "Removing BB: \n" << *BB);
+ DeleteDeadBlock(BB);
+ return true;
+ }
+
+ // Check to see if we can constant propagate this terminator instruction
+ // away...
+ Changed |= ConstantFoldTerminator(BB);
+
+ // Check for and eliminate duplicate PHI nodes in this block.
+ Changed |= EliminateDuplicatePHINodes(BB);
+
+ // If there is a trivial two-entry PHI node in this basic block, and we can
+ // eliminate it, do so now.
+ if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
+ if (PN->getNumIncomingValues() == 2)
+ Changed |= FoldTwoEntryPHINode(PN);
+
+ // If this is a returning block with only PHI nodes in it, fold the return
+ // instruction into any unconditional branch predecessors.
+ //
+ // If any predecessor is a conditional branch that just selects among
+ // different return values, fold the replace the branch/return with a select
+ // and return.
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ if (isTerminatorFirstRelevantInsn(BB, BB->getTerminator())) {
+ // Find predecessors that end with branches.
+ SmallVector<BasicBlock*, 8> UncondBranchPreds;
+ SmallVector<BranchInst*, 8> CondBranchPreds;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ TerminatorInst *PTI = P->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
+ if (BI->isUnconditional())
+ UncondBranchPreds.push_back(P);
+ else
+ CondBranchPreds.push_back(BI);
+ }
+ }
+
+ // If we found some, do the transformation!
+ if (!UncondBranchPreds.empty()) {
+ while (!UncondBranchPreds.empty()) {
+ BasicBlock *Pred = UncondBranchPreds.pop_back_val();
+ DEBUG(dbgs() << "FOLDING: " << *BB
+ << "INTO UNCOND BRANCH PRED: " << *Pred);
+ Instruction *UncondBranch = Pred->getTerminator();
+ // Clone the return and add it to the end of the predecessor.
+ Instruction *NewRet = RI->clone();
+ Pred->getInstList().push_back(NewRet);
+
+ // If the return instruction returns a value, and if the value was a
+ // PHI node in "BB", propagate the right value into the return.
+ for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
+ i != e; ++i)
+ if (PHINode *PN = dyn_cast<PHINode>(*i))
+ if (PN->getParent() == BB)
+ *i = PN->getIncomingValueForBlock(Pred);
+
+ // Update any PHI nodes in the returning block to realize that we no
+ // longer branch to them.
+ BB->removePredecessor(Pred);
+ Pred->getInstList().erase(UncondBranch);
+ }
+
+ // If we eliminated all predecessors of the block, delete the block now.
+ if (pred_begin(BB) == pred_end(BB))
+ // We know there are no successors, so just nuke the block.
+ M->getBasicBlockList().erase(BB);
+
+ return true;
+ }
+
+ // Check out all of the conditional branches going to this return
+ // instruction. If any of them just select between returns, change the
+ // branch itself into a select/return pair.
+ while (!CondBranchPreds.empty()) {
+ BranchInst *BI = CondBranchPreds.pop_back_val();
+
+ // Check to see if the non-BB successor is also a return block.
+ if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
+ isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
+ SimplifyCondBranchToTwoReturns(BI))
+ return true;
+ }
+ }
+ } else if (isa<UnwindInst>(BB->begin())) {
+ // Check to see if the first instruction in this block is just an unwind.
+ // If so, replace any invoke instructions which use this as an exception
+ // destination with call instructions.
+ //
+ SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
+ while (!Preds.empty()) {
+ BasicBlock *Pred = Preds.back();
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator()))
+ if (II->getUnwindDest() == BB) {
+ // Insert a new branch instruction before the invoke, because this
+ // is now a fall through.
+ BranchInst *BI = BranchInst::Create(II->getNormalDest(), II);
+ Pred->getInstList().remove(II); // Take out of symbol table
+
+ // Insert the call now.
+ SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3);
+ CallInst *CI = CallInst::Create(II->getCalledValue(),
+ Args.begin(), Args.end(),
+ II->getName(), BI);
+ CI->setCallingConv(II->getCallingConv());
+ CI->setAttributes(II->getAttributes());
+ // If the invoke produced a value, the Call now does instead.
+ II->replaceAllUsesWith(CI);
+ delete II;
+ Changed = true;
+ }
+
+ Preds.pop_back();
+ }
+
+ // If this block is now dead, remove it.
+ if (pred_begin(BB) == pred_end(BB)) {
+ // We know there are no successors, so just nuke the block.
+ M->getBasicBlockList().erase(BB);
+ return true;
+ }
+
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ if (isValueEqualityComparison(SI)) {
+ // If we only have one predecessor, and if it is a branch on this value,
+ // see if that predecessor totally determines the outcome of this switch.
+ if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+ if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred))
+ return SimplifyCFG(BB) || 1;
+
+ // If the block only contains the switch, see if we can fold the block
+ // away into any preds.
+ BasicBlock::iterator BBI = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ if (SI == &*BBI)
+ if (FoldValueComparisonIntoPredecessors(SI))
+ return SimplifyCFG(BB) || 1;
+ }
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ if (BI->isUnconditional()) {
+ BasicBlock::iterator BBI = BB->getFirstNonPHI();
+
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ if (BBI->isTerminator()) // Terminator is the only non-phi instruction!
+ if (BB != &BB->getParent()->getEntryBlock())
+ if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
+ return true;
+
+ } else { // Conditional branch
+ if (isValueEqualityComparison(BI)) {
+ // If we only have one predecessor, and if it is a branch on this value,
+ // see if that predecessor totally determines the outcome of this
+ // switch.
+ if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+ if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred))
+ return SimplifyCFG(BB) | true;
+
+ // This block must be empty, except for the setcond inst, if it exists.
+ // Ignore dbg intrinsics.
+ BasicBlock::iterator I = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(I))
+ ++I;
+ if (&*I == BI) {
+ if (FoldValueComparisonIntoPredecessors(BI))
+ return SimplifyCFG(BB) | true;
+ } else if (&*I == cast<Instruction>(BI->getCondition())){
+ ++I;
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(I))
+ ++I;
+ if(&*I == BI) {
+ if (FoldValueComparisonIntoPredecessors(BI))
+ return SimplifyCFG(BB) | true;
+ }
+ }
+ }
+
+ // If this is a branch on a phi node in the current block, thread control
+ // through this block if any PHI node entries are constants.
+ if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
+ if (PN->getParent() == BI->getParent())
+ if (FoldCondBranchOnPHI(BI))
+ return SimplifyCFG(BB) | true;
+
+ // If this basic block is ONLY a setcc and a branch, and if a predecessor
+ // branches to us and one of our successors, fold the setcc into the
+ // predecessor and use logical operations to pick the right destination.
+ if (FoldBranchToCommonDest(BI))
+ return SimplifyCFG(BB) | true;
+
+
+ // Scan predecessor blocks for conditional branches.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+ if (PBI != BI && PBI->isConditional())
+ if (SimplifyCondBranchToCondBranch(PBI, BI))
+ return SimplifyCFG(BB) | true;
+ }
+ } else if (isa<UnreachableInst>(BB->getTerminator())) {
+ // If there are any instructions immediately before the unreachable that can
+ // be removed, do so.
+ Instruction *Unreachable = BB->getTerminator();
+ while (Unreachable != BB->begin()) {
+ BasicBlock::iterator BBI = Unreachable;
+ --BBI;
+ // Do not delete instructions that can have side effects, like calls
+ // (which may never return) and volatile loads and stores.
+ if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
+ if (SI->isVolatile())
+ break;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(BBI))
+ if (LI->isVolatile())
+ break;
+
+ // Delete this instruction
+ BB->getInstList().erase(BBI);
+ Changed = true;
+ }
+
+ // If the unreachable instruction is the first in the block, take a gander
+ // at all of the predecessors of this instruction, and simplify them.
+ if (&BB->front() == Unreachable) {
+ SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ TerminatorInst *TI = Preds[i]->getTerminator();
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isUnconditional()) {
+ if (BI->getSuccessor(0) == BB) {
+ new UnreachableInst(TI->getContext(), TI);
+ TI->eraseFromParent();
+ Changed = true;
+ }
+ } else {
+ if (BI->getSuccessor(0) == BB) {
+ BranchInst::Create(BI->getSuccessor(1), BI);
+ EraseTerminatorInstAndDCECond(BI);
+ } else if (BI->getSuccessor(1) == BB) {
+ BranchInst::Create(BI->getSuccessor(0), BI);
+ EraseTerminatorInstAndDCECond(BI);
+ Changed = true;
+ }
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+ if (SI->getSuccessor(i) == BB) {
+ BB->removePredecessor(SI->getParent());
+ SI->removeCase(i);
+ --i; --e;
+ Changed = true;
+ }
+ // If the default value is unreachable, figure out the most popular
+ // destination and make it the default.
+ if (SI->getSuccessor(0) == BB) {
+ std::map<BasicBlock*, unsigned> Popularity;
+ for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+ Popularity[SI->getSuccessor(i)]++;
+
+ // Find the most popular block.
+ unsigned MaxPop = 0;
+ BasicBlock *MaxBlock = 0;
+ for (std::map<BasicBlock*, unsigned>::iterator
+ I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
+ if (I->second > MaxPop) {
+ MaxPop = I->second;
+ MaxBlock = I->first;
+ }
+ }
+ if (MaxBlock) {
+ // Make this the new default, allowing us to delete any explicit
+ // edges to it.
+ SI->setSuccessor(0, MaxBlock);
+ Changed = true;
+
+ // If MaxBlock has phinodes in it, remove MaxPop-1 entries from
+ // it.
+ if (isa<PHINode>(MaxBlock->begin()))
+ for (unsigned i = 0; i != MaxPop-1; ++i)
+ MaxBlock->removePredecessor(SI->getParent());
+
+ for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+ if (SI->getSuccessor(i) == MaxBlock) {
+ SI->removeCase(i);
+ --i; --e;
+ }
+ }
+ }
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) {
+ if (II->getUnwindDest() == BB) {
+ // Convert the invoke to a call instruction. This would be a good
+ // place to note that the call does not throw though.
+ BranchInst *BI = BranchInst::Create(II->getNormalDest(), II);
+ II->removeFromParent(); // Take out of symbol table
+
+ // Insert the call now...
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
+ CallInst *CI = CallInst::Create(II->getCalledValue(),
+ Args.begin(), Args.end(),
+ II->getName(), BI);
+ CI->setCallingConv(II->getCallingConv());
+ CI->setAttributes(II->getAttributes());
+ // If the invoke produced a value, the call does now instead.
+ II->replaceAllUsesWith(CI);
+ delete II;
+ Changed = true;
+ }
+ }
+ }
+
+ // If this block is now dead, remove it.
+ if (pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock()) {
+ // We know there are no successors, so just nuke the block.
+ M->getBasicBlockList().erase(BB);
+ return true;
+ }
+ }
+ } else if (IndirectBrInst *IBI =
+ dyn_cast<IndirectBrInst>(BB->getTerminator())) {
+ // Eliminate redundant destinations.
+ SmallPtrSet<Value *, 8> Succs;
+ for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+ BasicBlock *Dest = IBI->getDestination(i);
+ if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) {
+ Dest->removePredecessor(BB);
+ IBI->removeDestination(i);
+ --i; --e;
+ Changed = true;
+ }
+ }
+
+ if (IBI->getNumDestinations() == 0) {
+ // If the indirectbr has no successors, change it to unreachable.
+ new UnreachableInst(IBI->getContext(), IBI);
+ IBI->eraseFromParent();
+ Changed = true;
+ } else if (IBI->getNumDestinations() == 1) {
+ // If the indirectbr has one successor, change it to a direct branch.
+ BranchInst::Create(IBI->getDestination(0), IBI);
+ IBI->eraseFromParent();
+ Changed = true;
+ }
+ }
+
+ // Merge basic blocks into their predecessor if there is only one distinct
+ // pred, and if there is only one distinct successor of the predecessor, and
+ // if there are no PHI nodes.
+ //
+ if (MergeBlockIntoPredecessor(BB))
+ return true;
+
+ // Otherwise, if this block only has a single predecessor, and if that block
+ // is a conditional branch, see if we can hoist any code from this block up
+ // into our predecessor.
+ pred_iterator PI(pred_begin(BB)), PE(pred_end(BB));
+ BasicBlock *OnlyPred = 0;
+ for (; PI != PE; ++PI) { // Search all predecessors, see if they are all same
+ if (!OnlyPred)
+ OnlyPred = *PI;
+ else if (*PI != OnlyPred) {
+ OnlyPred = 0; // There are multiple different predecessors...
+ break;
+ }
+ }
+
+ if (OnlyPred)
+ if (BranchInst *BI = dyn_cast<BranchInst>(OnlyPred->getTerminator()))
+ if (BI->isConditional()) {
+ // Get the other block.
+ BasicBlock *OtherBB = BI->getSuccessor(BI->getSuccessor(0) == BB);
+ PI = pred_begin(OtherBB);
+ ++PI;
+
+ if (PI == pred_end(OtherBB)) {
+ // We have a conditional branch to two blocks that are only reachable
+ // from the condbr. We know that the condbr dominates the two blocks,
+ // so see if there is any identical code in the "then" and "else"
+ // blocks. If so, we can hoist it up to the branching block.
+ Changed |= HoistThenElseCodeToIf(BI);
+ } else {
+ BasicBlock* OnlySucc = NULL;
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
+ SI != SE; ++SI) {
+ if (!OnlySucc)
+ OnlySucc = *SI;
+ else if (*SI != OnlySucc) {
+ OnlySucc = 0; // There are multiple distinct successors!
+ break;
+ }
+ }
+
+ if (OnlySucc == OtherBB) {
+ // If BB's only successor is the other successor of the predecessor,
+ // i.e. a triangle, see if we can hoist any code from this block up
+ // to the "if" block.
+ Changed |= SpeculativelyExecuteBB(BI, BB);
+ }
+ }
+ }
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ if (BranchInst *BI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+ // Change br (X == 0 | X == 1), T, F into a switch instruction.
+ if (BI->isConditional() && isa<Instruction>(BI->getCondition())) {
+ Instruction *Cond = cast<Instruction>(BI->getCondition());
+ // If this is a bunch of seteq's or'd together, or if it's a bunch of
+ // 'setne's and'ed together, collect them.
+ Value *CompVal = 0;
+ std::vector<ConstantInt*> Values;
+ bool TrueWhenEqual = GatherValueComparisons(Cond, CompVal, Values);
+ if (CompVal) {
+ // There might be duplicate constants in the list, which the switch
+ // instruction can't handle, remove them now.
+ std::sort(Values.begin(), Values.end(), ConstantIntOrdering());
+ Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
+
+ // Figure out which block is which destination.
+ BasicBlock *DefaultBB = BI->getSuccessor(1);
+ BasicBlock *EdgeBB = BI->getSuccessor(0);
+ if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
+
+ // Convert pointer to int before we switch.
+ if (CompVal->getType()->isPointerTy()) {
+ assert(TD && "Cannot switch on pointer without TargetData");
+ CompVal = new PtrToIntInst(CompVal,
+ TD->getIntPtrType(CompVal->getContext()),
+ "magicptr", BI);
+ }
+
+ // Create the new switch instruction now.
+ SwitchInst *New = SwitchInst::Create(CompVal, DefaultBB,
+ Values.size(), BI);
+
+ // Add all of the 'cases' to the switch instruction.
+ for (unsigned i = 0, e = Values.size(); i != e; ++i)
+ New->addCase(Values[i], EdgeBB);
+
+ // We added edges from PI to the EdgeBB. As such, if there were any
+ // PHI nodes in EdgeBB, they need entries to be added corresponding to
+ // the number of edges added.
+ for (BasicBlock::iterator BBI = EdgeBB->begin();
+ isa<PHINode>(BBI); ++BBI) {
+ PHINode *PN = cast<PHINode>(BBI);
+ Value *InVal = PN->getIncomingValueForBlock(*PI);
+ for (unsigned i = 0, e = Values.size()-1; i != e; ++i)
+ PN->addIncoming(InVal, *PI);
+ }
+
+ // Erase the old branch instruction.
+ EraseTerminatorInstAndDCECond(BI);
+ return true;
+ }
+ }
+
+ return Changed;
+}
+
+/// SimplifyCFG - This function is used to do simplification of a CFG. For
+/// example, it adjusts branches to branches to eliminate the extra hop, it
+/// eliminates unreachable basic blocks, and does other "peephole" optimization
+/// of the CFG. It returns true if a modification was made.
+///
+bool llvm::SimplifyCFG(BasicBlock *BB, const TargetData *TD) {
+ return SimplifyCFGOpt(TD).run(BB);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
new file mode 100644
index 0000000..a51f1e1
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -0,0 +1,141 @@
+//===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to ensure that functions have at most one return
+// instruction in them. Additionally, it keeps track of which node is the new
+// exit node of the CFG. If there are no exit nodes in the CFG, the getExitNode
+// method will return a null pointer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+char UnifyFunctionExitNodes::ID = 0;
+INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn",
+ "Unify function exit nodes", false, false);
+
+Pass *llvm::createUnifyFunctionExitNodesPass() {
+ return new UnifyFunctionExitNodes();
+}
+
+void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
+ // We preserve the non-critical-edgeness property
+ AU.addPreservedID(BreakCriticalEdgesID);
+ // This is a cluster of orthogonal Transforms
+ AU.addPreserved("mem2reg");
+ AU.addPreservedID(LowerSwitchID);
+}
+
+// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new
+// BasicBlock, and converting all returns to unconditional branches to this
+// new basic block. The singular exit node is returned.
+//
+// If there are no return stmts in the Function, a null pointer is returned.
+//
+bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
+ // Loop over all of the blocks in a function, tracking all of the blocks that
+ // return.
+ //
+ std::vector<BasicBlock*> ReturningBlocks;
+ std::vector<BasicBlock*> UnwindingBlocks;
+ std::vector<BasicBlock*> UnreachableBlocks;
+ for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ if (isa<ReturnInst>(I->getTerminator()))
+ ReturningBlocks.push_back(I);
+ else if (isa<UnwindInst>(I->getTerminator()))
+ UnwindingBlocks.push_back(I);
+ else if (isa<UnreachableInst>(I->getTerminator()))
+ UnreachableBlocks.push_back(I);
+
+ // Handle unwinding blocks first.
+ if (UnwindingBlocks.empty()) {
+ UnwindBlock = 0;
+ } else if (UnwindingBlocks.size() == 1) {
+ UnwindBlock = UnwindingBlocks.front();
+ } else {
+ UnwindBlock = BasicBlock::Create(F.getContext(), "UnifiedUnwindBlock", &F);
+ new UnwindInst(F.getContext(), UnwindBlock);
+
+ for (std::vector<BasicBlock*>::iterator I = UnwindingBlocks.begin(),
+ E = UnwindingBlocks.end(); I != E; ++I) {
+ BasicBlock *BB = *I;
+ BB->getInstList().pop_back(); // Remove the unwind insn
+ BranchInst::Create(UnwindBlock, BB);
+ }
+ }
+
+ // Then unreachable blocks.
+ if (UnreachableBlocks.empty()) {
+ UnreachableBlock = 0;
+ } else if (UnreachableBlocks.size() == 1) {
+ UnreachableBlock = UnreachableBlocks.front();
+ } else {
+ UnreachableBlock = BasicBlock::Create(F.getContext(),
+ "UnifiedUnreachableBlock", &F);
+ new UnreachableInst(F.getContext(), UnreachableBlock);
+
+ for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(),
+ E = UnreachableBlocks.end(); I != E; ++I) {
+ BasicBlock *BB = *I;
+ BB->getInstList().pop_back(); // Remove the unreachable inst.
+ BranchInst::Create(UnreachableBlock, BB);
+ }
+ }
+
+ // Now handle return blocks.
+ if (ReturningBlocks.empty()) {
+ ReturnBlock = 0;
+ return false; // No blocks return
+ } else if (ReturningBlocks.size() == 1) {
+ ReturnBlock = ReturningBlocks.front(); // Already has a single return block
+ return false;
+ }
+
+ // Otherwise, we need to insert a new basic block into the function, add a PHI
+ // nodes (if the function returns values), and convert all of the return
+ // instructions into unconditional branches.
+ //
+ BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(),
+ "UnifiedReturnBlock", &F);
+
+ PHINode *PN = 0;
+ if (F.getReturnType()->isVoidTy()) {
+ ReturnInst::Create(F.getContext(), NULL, NewRetBlock);
+ } else {
+ // If the function doesn't return void... add a PHI node to the block...
+ PN = PHINode::Create(F.getReturnType(), "UnifiedRetVal");
+ NewRetBlock->getInstList().push_back(PN);
+ ReturnInst::Create(F.getContext(), PN, NewRetBlock);
+ }
+
+ // Loop over all of the blocks, replacing the return instruction with an
+ // unconditional branch.
+ //
+ for (std::vector<BasicBlock*>::iterator I = ReturningBlocks.begin(),
+ E = ReturningBlocks.end(); I != E; ++I) {
+ BasicBlock *BB = *I;
+
+ // Add an incoming element to the PHI node for every return instruction that
+ // is merging into this new block...
+ if (PN)
+ PN->addIncoming(BB->getTerminator()->getOperand(0), BB);
+
+ BB->getInstList().pop_back(); // Remove the return insn
+ BranchInst::Create(NewRetBlock, BB);
+ }
+ ReturnBlock = NewRetBlock;
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
new file mode 100644
index 0000000..fc4bde7
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -0,0 +1,177 @@
+//===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MapValue function, which is shared by various parts of
+// the lib/Transforms/Utils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Type.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Metadata.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
+ bool ModuleLevelChanges) {
+ Value *&VMSlot = VM[V];
+ if (VMSlot) return VMSlot; // Does it exist in the map yet?
+
+ // NOTE: VMSlot can be invalidated by any reference to VM, which can grow the
+ // DenseMap. This includes any recursive calls to MapValue.
+
+ // Global values do not need to be seeded into the VM if they
+ // are using the identity mapping.
+ if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V) ||
+ (isa<MDNode>(V) && !cast<MDNode>(V)->isFunctionLocal() &&
+ !ModuleLevelChanges))
+ return VMSlot = const_cast<Value*>(V);
+
+ if (const MDNode *MD = dyn_cast<MDNode>(V)) {
+ // Start by assuming that we'll use the identity mapping.
+ VMSlot = const_cast<Value*>(V);
+
+ // Check all operands to see if any need to be remapped.
+ for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
+ Value *OP = MD->getOperand(i);
+ if (!OP || MapValue(OP, VM, ModuleLevelChanges) == OP) continue;
+
+ // Ok, at least one operand needs remapping.
+ MDNode *Dummy = MDNode::getTemporary(V->getContext(), 0, 0);
+ VM[V] = Dummy;
+ SmallVector<Value*, 4> Elts;
+ Elts.reserve(MD->getNumOperands());
+ for (i = 0; i != e; ++i)
+ Elts.push_back(MD->getOperand(i) ?
+ MapValue(MD->getOperand(i), VM, ModuleLevelChanges) : 0);
+ MDNode *NewMD = MDNode::get(V->getContext(), Elts.data(), Elts.size());
+ Dummy->replaceAllUsesWith(NewMD);
+ MDNode::deleteTemporary(Dummy);
+ return VM[V] = NewMD;
+ }
+
+ // No operands needed remapping; keep the identity map.
+ return const_cast<Value*>(V);
+ }
+
+ Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V));
+ if (C == 0)
+ return 0;
+
+ if (isa<ConstantInt>(C) || isa<ConstantFP>(C) ||
+ isa<ConstantPointerNull>(C) || isa<ConstantAggregateZero>(C) ||
+ isa<UndefValue>(C))
+ return VMSlot = C; // Primitive constants map directly
+
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
+ for (User::op_iterator b = CA->op_begin(), i = b, e = CA->op_end();
+ i != e; ++i) {
+ Value *MV = MapValue(*i, VM, ModuleLevelChanges);
+ if (MV != *i) {
+ // This array must contain a reference to a global, make a new array
+ // and return it.
+ //
+ std::vector<Constant*> Values;
+ Values.reserve(CA->getNumOperands());
+ for (User::op_iterator j = b; j != i; ++j)
+ Values.push_back(cast<Constant>(*j));
+ Values.push_back(cast<Constant>(MV));
+ for (++i; i != e; ++i)
+ Values.push_back(cast<Constant>(MapValue(*i, VM,
+ ModuleLevelChanges)));
+ return VM[V] = ConstantArray::get(CA->getType(), Values);
+ }
+ }
+ return VM[V] = C;
+ }
+
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
+ for (User::op_iterator b = CS->op_begin(), i = b, e = CS->op_end();
+ i != e; ++i) {
+ Value *MV = MapValue(*i, VM, ModuleLevelChanges);
+ if (MV != *i) {
+ // This struct must contain a reference to a global, make a new struct
+ // and return it.
+ //
+ std::vector<Constant*> Values;
+ Values.reserve(CS->getNumOperands());
+ for (User::op_iterator j = b; j != i; ++j)
+ Values.push_back(cast<Constant>(*j));
+ Values.push_back(cast<Constant>(MV));
+ for (++i; i != e; ++i)
+ Values.push_back(cast<Constant>(MapValue(*i, VM,
+ ModuleLevelChanges)));
+ return VM[V] = ConstantStruct::get(CS->getType(), Values);
+ }
+ }
+ return VM[V] = C;
+ }
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ std::vector<Constant*> Ops;
+ for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i)
+ Ops.push_back(cast<Constant>(MapValue(*i, VM, ModuleLevelChanges)));
+ return VM[V] = CE->getWithOperands(Ops);
+ }
+
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
+ for (User::op_iterator b = CV->op_begin(), i = b, e = CV->op_end();
+ i != e; ++i) {
+ Value *MV = MapValue(*i, VM, ModuleLevelChanges);
+ if (MV != *i) {
+ // This vector value must contain a reference to a global, make a new
+ // vector constant and return it.
+ //
+ std::vector<Constant*> Values;
+ Values.reserve(CV->getNumOperands());
+ for (User::op_iterator j = b; j != i; ++j)
+ Values.push_back(cast<Constant>(*j));
+ Values.push_back(cast<Constant>(MV));
+ for (++i; i != e; ++i)
+ Values.push_back(cast<Constant>(MapValue(*i, VM,
+ ModuleLevelChanges)));
+ return VM[V] = ConstantVector::get(Values);
+ }
+ }
+ return VM[V] = C;
+ }
+
+ BlockAddress *BA = cast<BlockAddress>(C);
+ Function *F = cast<Function>(MapValue(BA->getFunction(), VM,
+ ModuleLevelChanges));
+ BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(),VM,
+ ModuleLevelChanges));
+ return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
+}
+
+/// RemapInstruction - Convert the instruction operands from referencing the
+/// current values into those specified by VMap.
+///
+void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges) {
+ // Remap operands.
+ for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
+ Value *V = MapValue(*op, VMap, ModuleLevelChanges);
+ assert(V && "Referenced value not in value map!");
+ *op = V;
+ }
+
+ // Remap attached metadata.
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+ I->getAllMetadata(MDs);
+ for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
+ MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
+ Value *Old = MI->second;
+ Value *New = MapValue(Old, VMap, ModuleLevelChanges);
+ if (New != Old)
+ I->setMetadata(MI->first, cast<MDNode>(New));
+ }
+}
OpenPOWER on IntegriCloud